diff --git "a/profile_trace/iteration_13312/rank2_trace.json" "b/profile_trace/iteration_13312/rank2_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_13312/rank2_trace.json" @@ -0,0 +1,68515 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 2, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "E2428152D67D44569B77A97EB332E804", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918106549.104, "dur": 146.287, + "args": { + "External id": 249857,"Record function id": 0, "Sequence number": 2987746, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918106566.466, "dur": 117.156, + "args": { + "External id": 249858,"Sequence number": 2987746, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 4183438, "tid": 31367, "ts": 667918106566.466, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 4183438, "tid": 31367, + "ts": 667918106574.429, "dur": 77.607, + "args": { + "External id": 249859,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918106708.714, "dur": 215.995, + "args": { + "External id": 249860,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 4183438, "tid": 31367, + "ts": 667918106763.151, "dur": 90.973, + "args": { + "External id": 249861,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 4183438, "tid": 31367, + "ts": 667918106794.802, "dur": 48.455, + "args": { + "External id": 249862,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918106858.971, "dur": 2.236, + "args": { + "External id": 249863,"Sequence number": 2987745, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 4183438, "tid": 31367, "ts": 667918106858.971, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918106865.030, "dur": 55.875, + "args": { + "External id": 249864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918106873.081, "dur": 47.294, + "args": { + "External id": 249865,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918106883.348, "dur": 3.260, + "args": { + "External id": 249866,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918106933.319, "dur": 17198.036, + "args": { + "External id": 249867,"Record function id": 0, "Sequence number": 2987743, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918106935.327, "dur": 17185.784, + "args": { + "External id": 249868,"Sequence number": 2987743, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 4183438, "tid": 31367, "ts": 667918106935.327, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918106974.488, "dur": 5.298, + "args": { + "External id": 249869,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918106984.611, "dur": 17020.153, + "args": { + "External id": 249870,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918106986.445, "dur": 17018.046, + "args": { + "External id": 249871,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918106991.928, "dur": 6.752, + "args": { + "External id": 249872,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918107000.243, "dur": 17002.213, + "args": { + "External id": 249873,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 4183438, "tid": 31367, + "ts": 667918124010.079, "dur": 0.714, + "args": { + "External id": 249874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 4183438, "tid": 31367, + "ts": 667918124013.199, "dur": 3.284, + "args": { + "External id": 249875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 4183438, "tid": 31367, + "ts": 667918124014.860, "dur": 1.234, + "args": { + "External id": 249876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 31367, + "ts": 667918124022.366, "dur": 31.963, + "args": { + "External id": 249877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 31367, + "ts": 667918124063.601, "dur": 47.559, + "args": { + "External id": 249878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 31367, + "ts": 667918124065.939, "dur": 45.017, + "args": { + "External id": 249879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 31367, + "ts": 667918124067.337, "dur": 43.347, + "args": { + "External id": 249880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124143.214, "dur": 17.605, + "args": { + "External id": 249881,"Record function id": 0, "Sequence number": 2987742, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124144.760, "dur": 13.840, + "args": { + "External id": 249882,"Sequence number": 2987742, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 4183438, "tid": 31367, "ts": 667918124144.760, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918124148.476, "dur": 9.906, + "args": { + "External id": 249883,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918124152.167, "dur": 6.037, + "args": { + "External id": 249884,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124164.776, "dur": 104.819, + "args": { + "External id": 249885,"Record function id": 0, "Sequence number": 2987741, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124165.784, "dur": 96.110, + "args": { + "External id": 249886,"Sequence number": 2987741, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 4183438, "tid": 31367, "ts": 667918124165.784, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918124169.640, "dur": 91.774, + "args": { + "External id": 249887,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918124175.483, "dur": 43.903, + "args": { + "External id": 249888,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918124179.839, "dur": 4.579, + "args": { + "External id": 249889,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918124185.752, "dur": 33.317, + "args": { + "External id": 249890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918124188.396, "dur": 30.007, + "args": { + "External id": 249891,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918124224.379, "dur": 5.164, + "args": { + "External id": 249892,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124227.648, "dur": 1.498, + "args": { + "External id": 249893,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[4194304, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918124230.665, "dur": 29.838, + "args": { + "External id": 249894,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124275.724, "dur": 65.189, + "args": { + "External id": 249895,"Record function id": 0, "Sequence number": 2987740, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124276.933, "dur": 60.080, + "args": { + "External id": 249896,"Sequence number": 2987740, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 4183438, "tid": 31367, "ts": 667918124276.933, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 4183438, "tid": 31367, + "ts": 667918124280.370, "dur": 56.390, + "args": { + "External id": 249897,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918124285.667, "dur": 21.990, + "args": { + "External id": 249898,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918124287.213, "dur": 2.965, + "args": { + "External id": 249899,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918124290.964, "dur": 16.429, + "args": { + "External id": 249900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918124294.032, "dur": 12.856, + "args": { + "External id": 249901,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918124311.036, "dur": 5.906, + "args": { + "External id": 249902,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124315.168, "dur": 1.093, + "args": { + "External id": 249903,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "3072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918124317.776, "dur": 18.325, + "args": { + "External id": 249904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124344.968, "dur": 108.535, + "args": { + "External id": 249905,"Record function id": 0, "Sequence number": 2987739, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124345.928, "dur": 104.556, + "args": { + "External id": 249906,"Sequence number": 2987739, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 4183438, "tid": 31367, "ts": 667918124345.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918124349.195, "dur": 100.933, + "args": { + "External id": 249907,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918124352.658, "dur": 18.736, + "args": { + "External id": 249908,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918124353.698, "dur": 2.401, + "args": { + "External id": 249909,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918124356.586, "dur": 14.541, + "args": { + "External id": 249910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918124357.859, "dur": 12.918, + "args": { + "External id": 249911,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918124374.807, "dur": 4.659, + "args": { + "External id": 249912,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124376.440, "dur": 2.849, + "args": { + "External id": 249913,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918124380.374, "dur": 69.086, + "args": { + "External id": 249914,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124459.593, "dur": 67.794, + "args": { + "External id": 249915,"Record function id": 0, "Sequence number": 2987738, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124460.701, "dur": 63.920, + "args": { + "External id": 249916,"Sequence number": 2987738, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 4183438, "tid": 31367, "ts": 667918124460.701, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918124461.640, "dur": 62.742, + "args": { + "External id": 249917,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918124464.146, "dur": 15.869, + "args": { + "External id": 249918,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918124464.955, "dur": 1.758, + "args": { + "External id": 249919,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918124467.223, "dur": 12.551, + "args": { + "External id": 249920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918124468.333, "dur": 11.113, + "args": { + "External id": 249921,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918124480.764, "dur": 4.041, + "args": { + "External id": 249922,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124484.054, "dur": 0.621, + "args": { + "External id": 249923,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918124487.457, "dur": 36.293, + "args": { + "External id": 249924,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124531.604, "dur": 36.848, + "args": { + "External id": 249925,"Record function id": 0, "Sequence number": 2987737, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918124532.681, "dur": 0.688, + "args": { + "External id": 249926,"Sequence number": 2987737, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 4183438, "tid": 31367, "ts": 667918124532.681, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918124535.380, "dur": 29.847, + "args": { + "External id": 249927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918124537.271, "dur": 27.475, + "args": { + "External id": 249928,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124543.698, "dur": 2.610, + "args": { + "External id": 249929,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918124573.487, "dur": 1068.010, + "args": { + "External id": 249930,"Record function id": 0, "Sequence number": 2987735, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918124574.813, "dur": 1035.818, + "args": { + "External id": 249931,"Sequence number": 2987735, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 4183438, "tid": 31367, "ts": 667918124574.813, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918124609.752, "dur": 2.723, + "args": { + "External id": 249932,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918124614.673, "dur": 912.186, + "args": { + "External id": 249933,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918124616.303, "dur": 910.288, + "args": { + "External id": 249934,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918124619.046, "dur": 5.281, + "args": { + "External id": 249935,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918124625.576, "dur": 899.779, + "args": { + "External id": 249936,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 4183438, "tid": 31367, + "ts": 667918125529.802, "dur": 0.340, + "args": { + "External id": 249937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 4183438, "tid": 31367, + "ts": 667918125531.239, "dur": 3.967, + "args": { + "External id": 249938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 4183438, "tid": 31367, + "ts": 667918125534.027, "dur": 1.047, + "args": { + "External id": 249939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 31367, + "ts": 667918125539.223, "dur": 22.319, + "args": { + "External id": 249940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 31367, + "ts": 667918125566.610, "dur": 37.113, + "args": { + "External id": 249941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 31367, + "ts": 667918125567.718, "dur": 35.797, + "args": { + "External id": 249942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 31367, + "ts": 667918125568.797, "dur": 34.487, + "args": { + "External id": 249943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918125619.337, "dur": 18.718, + "args": { + "External id": 249944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125649.693, "dur": 41.608, + "args": { + "External id": 249945,"Record function id": 0, "Sequence number": 2987734, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125650.872, "dur": 37.412, + "args": { + "External id": 249946,"Sequence number": 2987734, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 4183438, "tid": 31367, "ts": 667918125650.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918125680.390, "dur": 7.686, + "args": { + "External id": 249947,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918125683.785, "dur": 3.994, + "args": { + "External id": 249948,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125696.437, "dur": 63.282, + "args": { + "External id": 249949,"Record function id": 0, "Sequence number": 2987733, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125697.409, "dur": 57.571, + "args": { + "External id": 249950,"Sequence number": 2987733, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 4183438, "tid": 31367, "ts": 667918125697.409, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918125698.918, "dur": 55.682, + "args": { + "External id": 249951,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918125702.902, "dur": 25.027, + "args": { + "External id": 249952,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918125704.253, "dur": 5.040, + "args": { + "External id": 249953,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918125710.088, "dur": 17.582, + "args": { + "External id": 249954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918125711.086, "dur": 16.136, + "args": { + "External id": 249955,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918125729.422, "dur": 2.771, + "args": { + "External id": 249956,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918125731.169, "dur": 0.760, + "args": { + "External id": 249957,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[4194304, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918125733.162, "dur": 20.514, + "args": { + "External id": 249958,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125765.909, "dur": 54.294, + "args": { + "External id": 249959,"Record function id": 0, "Sequence number": 2987732, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125766.820, "dur": 50.844, + "args": { + "External id": 249960,"Sequence number": 2987732, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 4183438, "tid": 31367, "ts": 667918125766.820, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 4183438, "tid": 31367, + "ts": 667918125769.427, "dur": 48.029, + "args": { + "External id": 249961,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918125771.087, "dur": 17.879, + "args": { + "External id": 249962,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918125771.791, "dur": 2.265, + "args": { + "External id": 249963,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918125774.694, "dur": 14.005, + "args": { + "External id": 249964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918125775.405, "dur": 12.972, + "args": { + "External id": 249965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918125790.090, "dur": 7.346, + "args": { + "External id": 249966,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918125796.105, "dur": 0.877, + "args": { + "External id": 249967,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918125798.238, "dur": 18.691, + "args": { + "External id": 249968,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125824.225, "dur": 85.139, + "args": { + "External id": 249969,"Record function id": 0, "Sequence number": 2987731, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125825.225, "dur": 81.086, + "args": { + "External id": 249970,"Sequence number": 2987731, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 4183438, "tid": 31367, "ts": 667918125825.225, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918125826.923, "dur": 79.100, + "args": { + "External id": 249971,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918125828.059, "dur": 17.017, + "args": { + "External id": 249972,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918125829.090, "dur": 2.031, + "args": { + "External id": 249973,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918125831.790, "dur": 13.030, + "args": { + "External id": 249974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918125832.441, "dur": 12.062, + "args": { + "External id": 249975,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918125846.037, "dur": 2.954, + "args": { + "External id": 249976,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918125848.221, "dur": 0.651, + "args": { + "External id": 249977,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918125849.625, "dur": 55.759, + "args": { + "External id": 249978,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125914.675, "dur": 76.877, + "args": { + "External id": 249979,"Record function id": 0, "Sequence number": 2987730, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125915.443, "dur": 58.150, + "args": { + "External id": 249980,"Sequence number": 2987730, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 4183438, "tid": 31367, "ts": 667918125915.443, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918125917.803, "dur": 55.561, + "args": { + "External id": 249981,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918125918.378, "dur": 14.671, + "args": { + "External id": 249982,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918125918.927, "dur": 1.818, + "args": { + "External id": 249983,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918125921.217, "dur": 11.580, + "args": { + "External id": 249984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918125921.743, "dur": 10.754, + "args": { + "External id": 249985,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918125933.924, "dur": 1.754, + "args": { + "External id": 249986,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918125934.892, "dur": 0.636, + "args": { + "External id": 249987,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918125936.217, "dur": 36.490, + "args": { + "External id": 249988,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918125977.506, "dur": 12.828, + "args": { + "External id": 249989,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125995.575, "dur": 36.077, + "args": { + "External id": 249990,"Record function id": 0, "Sequence number": 2987729, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918125998.588, "dur": 0.541, + "args": { + "External id": 249991,"Sequence number": 2987729, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 4183438, "tid": 31367, "ts": 667918125998.588, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918126000.886, "dur": 28.077, + "args": { + "External id": 249992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918126002.445, "dur": 26.102, + "args": { + "External id": 249993,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918126010.384, "dur": 0.477, + "args": { + "External id": 249994,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918126035.944, "dur": 1481.107, + "args": { + "External id": 249995,"Record function id": 0, "Sequence number": 2987727, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918126038.934, "dur": 1453.616, + "args": { + "External id": 249996,"Sequence number": 2987727, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 4183438, "tid": 31367, "ts": 667918126038.934, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918126065.994, "dur": 2.046, + "args": { + "External id": 249997,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918126070.045, "dur": 1337.562, + "args": { + "External id": 249998,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918126071.446, "dur": 1335.894, + "args": { + "External id": 249999,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918126073.261, "dur": 3.211, + "args": { + "External id": 250000,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918126077.326, "dur": 1328.912, + "args": { + "External id": 250001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 4183438, "tid": 31367, + "ts": 667918127410.441, "dur": 0.327, + "args": { + "External id": 250002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 4183438, "tid": 31367, + "ts": 667918127411.891, "dur": 2.278, + "args": { + "External id": 250003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 4183438, "tid": 31367, + "ts": 667918127413.185, "dur": 0.871, + "args": { + "External id": 250004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 31367, + "ts": 667918127417.865, "dur": 20.394, + "args": { + "External id": 250005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 31367, + "ts": 667918127446.162, "dur": 39.202, + "args": { + "External id": 250006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 31367, + "ts": 667918127447.165, "dur": 37.963, + "args": { + "External id": 250007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 31367, + "ts": 667918127448.137, "dur": 36.740, + "args": { + "External id": 250008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918127499.671, "dur": 14.342, + "args": { + "External id": 250009,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127526.716, "dur": 10.203, + "args": { + "External id": 250010,"Record function id": 0, "Sequence number": 2987726, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127528.213, "dur": 6.688, + "args": { + "External id": 250011,"Sequence number": 2987726, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 4183438, "tid": 31367, "ts": 667918127528.213, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918127530.566, "dur": 4.107, + "args": { + "External id": 250012,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918127531.723, "dur": 2.835, + "args": { + "External id": 250013,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127540.363, "dur": 57.466, + "args": { + "External id": 250014,"Record function id": 0, "Sequence number": 2987725, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127541.042, "dur": 52.917, + "args": { + "External id": 250015,"Sequence number": 2987725, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 4183438, "tid": 31367, "ts": 667918127541.042, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918127544.000, "dur": 49.580, + "args": { + "External id": 250016,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918127548.362, "dur": 17.552, + "args": { + "External id": 250017,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918127549.520, "dur": 2.570, + "args": { + "External id": 250018,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918127552.982, "dur": 12.666, + "args": { + "External id": 250019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918127553.975, "dur": 11.333, + "args": { + "External id": 250020,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918127567.324, "dur": 4.194, + "args": { + "External id": 250021,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127570.390, "dur": 0.846, + "args": { + "External id": 250022,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[4194304, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918127572.310, "dur": 20.636, + "args": { + "External id": 250023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127601.677, "dur": 93.697, + "args": { + "External id": 250024,"Record function id": 0, "Sequence number": 2987724, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127602.690, "dur": 88.312, + "args": { + "External id": 250025,"Sequence number": 2987724, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 4183438, "tid": 31367, "ts": 667918127602.690, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 4183438, "tid": 31367, + "ts": 667918127604.238, "dur": 86.410, + "args": { + "External id": 250026,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918127605.587, "dur": 18.217, + "args": { + "External id": 250027,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918127608.532, "dur": 2.598, + "args": { + "External id": 250028,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918127611.830, "dur": 11.706, + "args": { + "External id": 250029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918127612.739, "dur": 10.436, + "args": { + "External id": 250030,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918127624.764, "dur": 6.959, + "args": { + "External id": 250031,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127628.571, "dur": 2.705, + "args": { + "External id": 250032,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "1024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918127632.203, "dur": 57.390, + "args": { + "External id": 250033,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127701.372, "dur": 75.595, + "args": { + "External id": 250034,"Record function id": 0, "Sequence number": 2987723, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127702.605, "dur": 71.697, + "args": { + "External id": 250035,"Sequence number": 2987723, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 4183438, "tid": 31367, "ts": 667918127702.605, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918127704.289, "dur": 69.708, + "args": { + "External id": 250036,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918127705.483, "dur": 22.568, + "args": { + "External id": 250037,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918127708.138, "dur": 2.855, + "args": { + "External id": 250038,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918127711.709, "dur": 16.069, + "args": { + "External id": 250039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918127712.304, "dur": 15.079, + "args": { + "External id": 250040,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918127729.040, "dur": 3.702, + "args": { + "External id": 250041,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127731.770, "dur": 0.703, + "args": { + "External id": 250042,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918127733.518, "dur": 39.730, + "args": { + "External id": 250043,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127780.657, "dur": 82.911, + "args": { + "External id": 250044,"Record function id": 0, "Sequence number": 2987722, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127781.765, "dur": 62.061, + "args": { + "External id": 250045,"Sequence number": 2987722, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 4183438, "tid": 31367, "ts": 667918127781.765, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918127784.987, "dur": 58.589, + "args": { + "External id": 250046,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918127785.899, "dur": 16.550, + "args": { + "External id": 250047,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918127788.655, "dur": 1.690, + "args": { + "External id": 250048,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918127790.968, "dur": 11.214, + "args": { + "External id": 250049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918127791.759, "dur": 10.100, + "args": { + "External id": 250050,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918127803.277, "dur": 4.034, + "args": { + "External id": 250051,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127806.289, "dur": 0.833, + "args": { + "External id": 250052,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918127807.835, "dur": 35.145, + "args": { + "External id": 250053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918127847.203, "dur": 15.096, + "args": { + "External id": 250054,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127867.202, "dur": 26.727, + "args": { + "External id": 250055,"Record function id": 0, "Sequence number": 2987721, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918127868.033, "dur": 0.578, + "args": { + "External id": 250056,"Sequence number": 2987721, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 4183438, "tid": 31367, "ts": 667918127868.033, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918127869.957, "dur": 19.524, + "args": { + "External id": 250057,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918127871.361, "dur": 17.642, + "args": { + "External id": 250058,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127875.122, "dur": 0.379, + "args": { + "External id": 250059,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918127897.745, "dur": 1458.223, + "args": { + "External id": 250060,"Record function id": 0, "Sequence number": 2987720, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918127909.824, "dur": 1420.294, + "args": { + "External id": 250061,"Sequence number": 2987720, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 4183438, "tid": 31367, "ts": 667918127909.824, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918127935.214, "dur": 1.883, + "args": { + "External id": 250062,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918127938.957, "dur": 1309.050, + "args": { + "External id": 250063,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918127940.057, "dur": 1307.712, + "args": { + "External id": 250064,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918127943.527, "dur": 3.133, + "args": { + "External id": 250065,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918127947.552, "dur": 1299.160, + "args": { + "External id": 250066,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 4183438, "tid": 31367, + "ts": 667918129250.897, "dur": 0.478, + "args": { + "External id": 250067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 4183438, "tid": 31367, + "ts": 667918129252.380, "dur": 2.415, + "args": { + "External id": 250068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 4183438, "tid": 31367, + "ts": 667918129253.921, "dur": 0.756, + "args": { + "External id": 250069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 31367, + "ts": 667918129258.320, "dur": 21.044, + "args": { + "External id": 250070,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 31367, + "ts": 667918129283.849, "dur": 39.757, + "args": { + "External id": 250071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 31367, + "ts": 667918129284.830, "dur": 38.560, + "args": { + "External id": 250072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 31367, + "ts": 667918129286.212, "dur": 36.895, + "args": { + "External id": 250073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918129339.235, "dur": 13.193, + "args": { + "External id": 250074,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918129368.208, "dur": 13.314, + "args": { + "External id": 250075,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918129370.864, "dur": 8.623, + "args": { + "External id": 250076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918129374.499, "dur": 4.066, + "args": { + "External id": 250077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918129375.277, "dur": 3.163, + "args": { + "External id": 250078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129384.819, "dur": 10.845, + "args": { + "External id": 250079,"Record function id": 0, "Sequence number": 2987719, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129385.743, "dur": 7.725, + "args": { + "External id": 250080,"Sequence number": 2987719, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 4183438, "tid": 31367, "ts": 667918129385.743, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918129388.068, "dur": 5.218, + "args": { + "External id": 250081,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918129390.662, "dur": 2.488, + "args": { + "External id": 250082,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129399.145, "dur": 55.087, + "args": { + "External id": 250083,"Record function id": 0, "Sequence number": 2987718, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129400.011, "dur": 50.180, + "args": { + "External id": 250084,"Sequence number": 2987718, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 4183438, "tid": 31367, "ts": 667918129400.011, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918129401.544, "dur": 48.301, + "args": { + "External id": 250085,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918129405.362, "dur": 17.359, + "args": { + "External id": 250086,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918129406.348, "dur": 2.458, + "args": { + "External id": 250087,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918129409.579, "dur": 12.873, + "args": { + "External id": 250088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918129410.544, "dur": 11.443, + "args": { + "External id": 250089,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918129424.092, "dur": 4.738, + "args": { + "External id": 250090,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918129427.737, "dur": 0.807, + "args": { + "External id": 250091,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[4194304, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918129429.550, "dur": 19.646, + "args": { + "External id": 250092,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129459.866, "dur": 65.282, + "args": { + "External id": 250093,"Record function id": 0, "Sequence number": 2987717, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129461.028, "dur": 61.796, + "args": { + "External id": 250094,"Sequence number": 2987717, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 4183438, "tid": 31367, "ts": 667918129461.028, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 4183438, "tid": 31367, + "ts": 667918129462.791, "dur": 59.786, + "args": { + "External id": 250095,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918129463.982, "dur": 30.100, + "args": { + "External id": 250096,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918129470.336, "dur": 2.604, + "args": { + "External id": 250097,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918129473.723, "dur": 20.105, + "args": { + "External id": 250098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918129475.025, "dur": 18.454, + "args": { + "External id": 250099,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918129495.352, "dur": 3.775, + "args": { + "External id": 250100,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918129497.887, "dur": 0.785, + "args": { + "External id": 250101,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918129499.681, "dur": 22.305, + "args": { + "External id": 250102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129528.743, "dur": 76.043, + "args": { + "External id": 250103,"Record function id": 0, "Sequence number": 2987716, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129529.674, "dur": 72.623, + "args": { + "External id": 250104,"Sequence number": 2987716, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 4183438, "tid": 31367, "ts": 667918129529.674, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918129530.903, "dur": 71.102, + "args": { + "External id": 250105,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918129533.167, "dur": 22.368, + "args": { + "External id": 250106,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918129535.986, "dur": 1.870, + "args": { + "External id": 250107,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918129538.556, "dur": 16.699, + "args": { + "External id": 250108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918129539.176, "dur": 15.737, + "args": { + "External id": 250109,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918129556.483, "dur": 3.723, + "args": { + "External id": 250110,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918129557.712, "dur": 2.322, + "args": { + "External id": 250111,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918129560.815, "dur": 40.483, + "args": { + "External id": 250112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129608.404, "dur": 120.220, + "args": { + "External id": 250113,"Record function id": 0, "Sequence number": 2987715, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918129609.216, "dur": 97.635, + "args": { + "External id": 250114,"Sequence number": 2987715, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 4183438, "tid": 31367, "ts": 667918129609.216, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 4183438, "tid": 31367, + "ts": 667918129610.238, "dur": 96.236, + "args": { + "External id": 250115,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 31367, + "ts": 667918129611.022, "dur": 26.069, + "args": { + "External id": 250116,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918129616.411, "dur": 1.413, + "args": { + "External id": 250117,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 1024]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 31367, + "ts": 667918129622.203, "dur": 14.636, + "args": { + "External id": 250118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 31367, + "ts": 667918129624.905, "dur": 11.567, + "args": { + "External id": 250119,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918129638.085, "dur": 2.234, + "args": { + "External id": 250120,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918129639.416, "dur": 0.752, + "args": { + "External id": 250121,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918129641.092, "dur": 64.274, + "args": { + "External id": 250122,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918129711.928, "dur": 14.622, + "args": { + "External id": 250123,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [16777216, 4096, 1024, 1], []], "Input Dims": [[16, 4096, 4, 1024], [16, 4096, 4, 1024], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918129735.288, "dur": 330.018, + "args": { + "External id": 250124,"Record function id": 0, "Sequence number": 2987714, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918129736.964, "dur": 319.020, + "args": { + "External id": 250125,"Sequence number": 2987714, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 4183438, "tid": 31367, "ts": 667918129736.964, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918129898.559, "dur": 40.956, + "args": { + "External id": 250126,"kernel_hash": "cwcxij4wsirwittcv63raib7filq3solcjt7s5agn3kgmuei6diw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "262144", "1024", "1", "1986", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/wc/cwcxij4wsirwittcv63raib7filq3solcjt7s5agn3kgmuei6diw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 1024], [1024], [262144, 1024], [262144, 1024], [132, 1024], [262144], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 4183438, "tid": 31367, + "ts": 667918129969.012, "dur": 27.510, + "args": { + "External id": 250127,"kernel_hash": "c56gzotufw6tqupgsmxkhpndljpenudz4jbqivnf244rqggxwo5z", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/56/c56gzotufw6tqupgsmxkhpndljpenudz4jbqivnf244rqggxwo5z.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 4183438, "tid": 31367, + "ts": 667918130014.317, "dur": 16.931, + "args": { + "External id": 250128,"kernel_hash": "c44qdv7w6al3deyvs2xnwnza27yveqwslv54u6ulmyjvjqyrds6n", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/44/c44qdv7w6al3deyvs2xnwnza27yveqwslv54u6ulmyjvjqyrds6n.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130073.967, "dur": 13.066, + "args": { + "External id": 250129,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130075.920, "dur": 10.265, + "args": { + "External id": 250130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918130078.890, "dur": 6.514, + "args": { + "External id": 250131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918130082.251, "dur": 3.007, + "args": { + "External id": 250132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130093.065, "dur": 27.953, + "args": { + "External id": 250133,"Record function id": 0, "Sequence number": 2987713, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130093.947, "dur": 20.533, + "args": { + "External id": 250134,"Sequence number": 2987713, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1024, 1]], "Input Dims": [[16, 4096, 4, 1024]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 4183438, "tid": 31367, "ts": 667918130093.947, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918130096.439, "dur": 7.175, + "args": { + "External id": 250135,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130100.811, "dur": 1.193, + "args": { + "External id": 250136,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918130104.334, "dur": 4.122, + "args": { + "External id": 250137,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130107.191, "dur": 0.509, + "args": { + "External id": 250138,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "1024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918130108.892, "dur": 1.848, + "args": { + "External id": 250139,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130109.716, "dur": 0.388, + "args": { + "External id": 250140,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 31367, + "ts": 667918130111.343, "dur": 2.421, + "args": { + "External id": 250141,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130112.648, "dur": 0.677, + "args": { + "External id": 250142,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "3072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130124.917, "dur": 4.984, + "args": { + "External id": 250143,"Record function id": 0, "Sequence number": 2987712, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130125.793, "dur": 1.065, + "args": { + "External id": 250144,"Sequence number": 2987712, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 4183438, "tid": 31367, "ts": 667918130125.793, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918130134.227, "dur": 504.796, + "args": { + "External id": 250145,"Record function id": 0, "Sequence number": 2987711, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918130135.612, "dur": 490.913, + "args": { + "External id": 250146,"Sequence number": 2987711, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 4183438, "tid": 31367, "ts": 667918130135.612, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130174.686, "dur": 12.479, + "args": { + "External id": 250147,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918130182.240, "dur": 4.572, + "args": { + "External id": 250148,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]", "[4096, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[16777216, 4096, 1], [], []], "Input Dims": [[16, 4096, 1024], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130190.855, "dur": 25.137, + "args": { + "External id": 250149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130208.374, "dur": 6.592, + "args": { + "External id": 250150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130212.168, "dur": 2.348, + "args": { + "External id": 250151,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 31367, + "ts": 667918130220.085, "dur": 122.778, + "args": { + "External id": 250152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [1, 2816], []], "Input Dims": [[65536, 1024], [2816, 1024], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130221.075, "dur": 3.756, + "args": { + "External id": 250153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2816]], "Input Dims": [[2816, 1024]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130221.917, "dur": 2.117, + "args": { + "External id": 250154,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2816], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130223.436, "dur": 0.461, + "args": { + "External id": 250155,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[2816, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2816], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 31367, + "ts": 667918130226.137, "dur": 115.952, + "args": { + "External id": 250156,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918130228.098, "dur": 112.891, + "args": { + "External id": 250157,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918130349.809, "dur": 4.494, + "args": { + "External id": 250158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [11534336, 2816, 1]], "Input Dims": [[65536, 2816], [16, 4096, 2816]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130351.627, "dur": 2.553, + "args": { + "External id": 250159,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918130386.508, "dur": 6.093, + "args": { + "External id": 250160,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918130394.296, "dur": 2.157, + "args": { + "External id": 250161,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918130397.833, "dur": 2.135, + "args": { + "External id": 250162,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130440.359, "dur": 2.678, + "args": { + "External id": 250163,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130441.618, "dur": 1.278, + "args": { + "External id": 250164,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 4183438, "tid": 31367, + "ts": 667918130471.414, "dur": 133.782, + "args": { + "External id": 250165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[4096, 1], [2816, 1]], []], "Input Dims": [[], [[65536, 1024], [65536, 2816]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918130479.379, "dur": 9.980, + "args": { + "External id": 250166,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130484.340, "dur": 2.925, + "args": { + "External id": 250167,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024, 1]", "[4096, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918130491.628, "dur": 8.357, + "args": { + "External id": 250168,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1, 1], []], "Input Dims": [[65536, 1024, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130498.493, "dur": 0.669, + "args": { + "External id": 250169,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 65536]", "[1, 1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 1], [], [], []], "Input Dims": [[65536, 1024, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918130501.589, "dur": 2.421, + "args": { + "External id": 250170,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130503.157, "dur": 0.462, + "args": { + "External id": 250171,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918130504.925, "dur": 3.087, + "args": { + "External id": 250172,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130507.054, "dur": 0.400, + "args": { + "External id": 250173,"Record function id": 0, "Concrete Inputs": ["", "[1, 2816, 65536]", "[1, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1, 1], [], [], []], "Input Dims": [[65536, 2816, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918130514.220, "dur": 2.841, + "args": { + "External id": 250174,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 4096], []], "Input Dims": [[1024, 1, 65536], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130516.244, "dur": 0.510, + "args": { + "External id": 250175,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536, 1]", "[1, 4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 4096], [], [], []], "Input Dims": [[1024, 1, 65536], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130518.282, "dur": 6.321, + "args": { + "External id": 250176,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 4096, 1], []], "Input Dims": [[1024, 65536, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918130522.363, "dur": 2.021, + "args": { + "External id": 250177,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]", "[1024, 1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 4096, 1], [], []], "Input Dims": [[1024, 65536, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918130525.795, "dur": 2.932, + "args": { + "External id": 250178,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 2816], []], "Input Dims": [[1, 2816, 65536], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130527.784, "dur": 0.634, + "args": { + "External id": 250179,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 2816], [], [], []], "Input Dims": [[1, 2816, 65536], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130529.951, "dur": 2.336, + "args": { + "External id": 250180,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130531.247, "dur": 0.950, + "args": { + "External id": 250181,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918130534.537, "dur": 58.924, + "args": { + "External id": 250182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1, 4096], [184549376, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130595.425, "dur": 1.016, + "args": { + "External id": 250183,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2883584, 2816, 1], []], "Input Dims": [[1, 1024, 2816], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918130597.359, "dur": 2.992, + "args": { + "External id": 250184,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 2816, 1], []], "Input Dims": [[1024, 1, 2816], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130599.268, "dur": 0.510, + "args": { + "External id": 250185,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816, 1]", "[2816, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 2816, 1], [], [], []], "Input Dims": [[1024, 1, 2816], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130602.862, "dur": 0.930, + "args": { + "External id": 250186,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 2816], []], "Input Dims": [[1024, 2816, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130693.359, "dur": 13.662, + "args": { + "External id": 250187,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130696.943, "dur": 8.962, + "args": { + "External id": 250188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918130700.311, "dur": 4.081, + "args": { + "External id": 250189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918130701.417, "dur": 2.748, + "args": { + "External id": 250190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130713.232, "dur": 8.976, + "args": { + "External id": 250191,"Record function id": 0, "Sequence number": 2987710, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130714.637, "dur": 4.693, + "args": { + "External id": 250192,"Sequence number": 2987710, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 4183438, "tid": 31367, "ts": 667918130714.637, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130716.824, "dur": 2.299, + "args": { + "External id": 250193,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130717.745, "dur": 1.236, + "args": { + "External id": 250194,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130726.364, "dur": 131.550, + "args": { + "External id": 250195,"Record function id": 0, "Sequence number": 2987709, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130729.041, "dur": 121.415, + "args": { + "External id": 250196,"Sequence number": 2987709, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 4183438, "tid": 31367, "ts": 667918130729.041, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130732.947, "dur": 4.716, + "args": { + "External id": 250197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130734.341, "dur": 2.725, + "args": { + "External id": 250198,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130735.938, "dur": 0.924, + "args": { + "External id": 250199,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918130738.950, "dur": 52.086, + "args": { + "External id": 250200,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130792.091, "dur": 5.902, + "args": { + "External id": 250201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130792.655, "dur": 4.656, + "args": { + "External id": 250202,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130795.997, "dur": 1.123, + "args": { + "External id": 250203,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130799.639, "dur": 5.564, + "args": { + "External id": 250204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130800.905, "dur": 3.851, + "args": { + "External id": 250205,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130802.542, "dur": 2.131, + "args": { + "External id": 250206,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918130805.781, "dur": 43.961, + "args": { + "External id": 250207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130862.606, "dur": 8.267, + "args": { + "External id": 250208,"Record function id": 0, "Sequence number": 2987708, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130863.604, "dur": 5.698, + "args": { + "External id": 250209,"Sequence number": 2987708, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 4183438, "tid": 31367, "ts": 667918130863.604, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130865.083, "dur": 4.074, + "args": { + "External id": 250210,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130867.768, "dur": 1.245, + "args": { + "External id": 250211,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130874.361, "dur": 7.875, + "args": { + "External id": 250212,"Record function id": 0, "Sequence number": 2987707, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130875.777, "dur": 4.603, + "args": { + "External id": 250213,"Sequence number": 2987707, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 4183438, "tid": 31367, "ts": 667918130875.777, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130876.563, "dur": 3.597, + "args": { + "External id": 250214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130877.303, "dur": 2.378, + "args": { + "External id": 250215,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130879.113, "dur": 0.469, + "args": { + "External id": 250216,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130886.361, "dur": 5.697, + "args": { + "External id": 250217,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918130888.080, "dur": 3.440, + "args": { + "External id": 250218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918130889.081, "dur": 2.167, + "args": { + "External id": 250219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918130889.813, "dur": 1.293, + "args": { + "External id": 250220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130895.286, "dur": 7.859, + "args": { + "External id": 250221,"Record function id": 0, "Sequence number": 2987706, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130896.566, "dur": 4.243, + "args": { + "External id": 250222,"Sequence number": 2987706, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 4183438, "tid": 31367, "ts": 667918130896.566, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918130899.262, "dur": 1.391, + "args": { + "External id": 250223,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918130899.854, "dur": 0.669, + "args": { + "External id": 250224,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130906.188, "dur": 92.406, + "args": { + "External id": 250225,"Record function id": 0, "Sequence number": 2987705, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918130906.837, "dur": 84.482, + "args": { + "External id": 250226,"Sequence number": 2987705, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 4183438, "tid": 31367, "ts": 667918130906.837, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130909.164, "dur": 4.493, + "args": { + "External id": 250227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130909.697, "dur": 3.453, + "args": { + "External id": 250228,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130912.644, "dur": 0.368, + "args": { + "External id": 250229,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918130914.178, "dur": 27.644, + "args": { + "External id": 250230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130942.770, "dur": 5.271, + "args": { + "External id": 250231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130943.472, "dur": 4.036, + "args": { + "External id": 250232,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130945.276, "dur": 2.120, + "args": { + "External id": 250233,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918130949.084, "dur": 4.252, + "args": { + "External id": 250234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918130949.714, "dur": 3.129, + "args": { + "External id": 250235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918130952.416, "dur": 0.356, + "args": { + "External id": 250236,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918130953.835, "dur": 36.752, + "args": { + "External id": 250237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131005.018, "dur": 34.755, + "args": { + "External id": 250238,"Record function id": 0, "Sequence number": 2987704, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131005.944, "dur": 4.382, + "args": { + "External id": 250239,"Sequence number": 2987704, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 4183438, "tid": 31367, "ts": 667918131005.944, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131007.221, "dur": 2.974, + "args": { + "External id": 250240,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131008.499, "dur": 1.546, + "args": { + "External id": 250241,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918131013.140, "dur": 24.231, + "args": { + "External id": 250242,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131043.559, "dur": 9.951, + "args": { + "External id": 250243,"Record function id": 0, "Sequence number": 2987703, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131044.831, "dur": 6.858, + "args": { + "External id": 250244,"Sequence number": 2987703, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 4183438, "tid": 31367, "ts": 667918131044.831, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918131047.620, "dur": 3.851, + "args": { + "External id": 250245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918131048.677, "dur": 2.304, + "args": { + "External id": 250246,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131050.343, "dur": 0.485, + "args": { + "External id": 250247,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131057.542, "dur": 4.962, + "args": { + "External id": 250248,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131059.015, "dur": 3.039, + "args": { + "External id": 250249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918131060.084, "dur": 1.643, + "args": { + "External id": 250250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918131060.548, "dur": 1.072, + "args": { + "External id": 250251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918131066.798, "dur": 450.283, + "args": { + "External id": 250252,"Record function id": 0, "Sequence number": 2987702, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918131068.188, "dur": 435.455, + "args": { + "External id": 250253,"Sequence number": 2987702, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 4183438, "tid": 31367, "ts": 667918131068.188, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 31367, + "ts": 667918131093.775, "dur": 35.167, + "args": { + "External id": 250254,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918131095.178, "dur": 33.562, + "args": { + "External id": 250255,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918131098.056, "dur": 6.243, + "args": { + "External id": 250256,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918131101.018, "dur": 2.702, + "args": { + "External id": 250257,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918131105.995, "dur": 22.188, + "args": { + "External id": 250258,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131142.777, "dur": 4.444, + "args": { + "External id": 250259,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131143.851, "dur": 3.246, + "args": { + "External id": 250260,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131151.576, "dur": 1.835, + "args": { + "External id": 250261,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131152.449, "dur": 0.858, + "args": { + "External id": 250262,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918131168.295, "dur": 2.213, + "args": { + "External id": 250263,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918131182.410, "dur": 2.511, + "args": { + "External id": 250264,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131389.009, "dur": 3.328, + "args": { + "External id": 250265,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918131397.593, "dur": 35.008, + "args": { + "External id": 250266,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131407.557, "dur": 1.004, + "args": { + "External id": 250267,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918131438.454, "dur": 31.615, + "args": { + "External id": 250268,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918131440.235, "dur": 29.629, + "args": { + "External id": 250269,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131444.958, "dur": 3.872, + "args": { + "External id": 250270,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918131452.270, "dur": 17.053, + "args": { + "External id": 250271,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918131474.571, "dur": 2.631, + "args": { + "External id": 250272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131475.968, "dur": 1.079, + "args": { + "External id": 250273,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131484.474, "dur": 4.304, + "args": { + "External id": 250274,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131485.906, "dur": 2.763, + "args": { + "External id": 250275,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131491.427, "dur": 1.833, + "args": { + "External id": 250276,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131492.432, "dur": 0.721, + "args": { + "External id": 250277,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131530.355, "dur": 8.866, + "args": { + "External id": 250278,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131532.691, "dur": 5.727, + "args": { + "External id": 250279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918131534.762, "dur": 2.694, + "args": { + "External id": 250280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918131535.719, "dur": 1.606, + "args": { + "External id": 250281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131543.028, "dur": 7.744, + "args": { + "External id": 250282,"Record function id": 0, "Sequence number": 2987701, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131544.551, "dur": 3.632, + "args": { + "External id": 250283,"Sequence number": 2987701, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 4183438, "tid": 31367, "ts": 667918131544.551, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131545.973, "dur": 2.024, + "args": { + "External id": 250284,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131546.884, "dur": 0.970, + "args": { + "External id": 250285,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131556.303, "dur": 186.533, + "args": { + "External id": 250286,"Record function id": 0, "Sequence number": 2987700, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131557.153, "dur": 179.691, + "args": { + "External id": 250287,"Sequence number": 2987700, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 4183438, "tid": 31367, "ts": 667918131557.153, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918131560.368, "dur": 4.947, + "args": { + "External id": 250288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918131561.729, "dur": 2.997, + "args": { + "External id": 250289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131563.765, "dur": 0.789, + "args": { + "External id": 250290,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918131566.394, "dur": 70.315, + "args": { + "External id": 250291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918131638.090, "dur": 7.476, + "args": { + "External id": 250292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918131640.954, "dur": 4.006, + "args": { + "External id": 250293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131642.540, "dur": 2.207, + "args": { + "External id": 250294,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918131647.106, "dur": 3.223, + "args": { + "External id": 250295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918131648.066, "dur": 1.833, + "args": { + "External id": 250296,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131649.329, "dur": 0.460, + "args": { + "External id": 250297,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918131690.907, "dur": 44.709, + "args": { + "External id": 250298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131749.884, "dur": 12.645, + "args": { + "External id": 250299,"Record function id": 0, "Sequence number": 2987699, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131751.357, "dur": 8.772, + "args": { + "External id": 250300,"Sequence number": 2987699, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 4183438, "tid": 31367, "ts": 667918131751.357, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131755.141, "dur": 4.832, + "args": { + "External id": 250301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131756.059, "dur": 3.802, + "args": { + "External id": 250302,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131766.054, "dur": 8.522, + "args": { + "External id": 250303,"Record function id": 0, "Sequence number": 2987698, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131767.188, "dur": 4.834, + "args": { + "External id": 250304,"Sequence number": 2987698, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 4183438, "tid": 31367, "ts": 667918131767.188, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918131768.076, "dur": 3.721, + "args": { + "External id": 250305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918131768.836, "dur": 2.468, + "args": { + "External id": 250306,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131770.677, "dur": 0.504, + "args": { + "External id": 250307,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131778.640, "dur": 8.607, + "args": { + "External id": 250308,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918131780.669, "dur": 6.103, + "args": { + "External id": 250309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918131781.869, "dur": 4.571, + "args": { + "External id": 250310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918131784.618, "dur": 1.696, + "args": { + "External id": 250311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131790.187, "dur": 7.235, + "args": { + "External id": 250312,"Record function id": 0, "Sequence number": 2987697, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918131790.956, "dur": 4.480, + "args": { + "External id": 250313,"Sequence number": 2987697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 4183438, "tid": 31367, "ts": 667918131790.956, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918131792.679, "dur": 2.597, + "args": { + "External id": 250314,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918131793.887, "dur": 1.231, + "args": { + "External id": 250315,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918131802.027, "dur": 301.552, + "args": { + "External id": 250316,"Record function id": 0, "Sequence number": 2987696, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918131803.551, "dur": 283.116, + "args": { + "External id": 250317,"Sequence number": 2987696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 4183438, "tid": 31367, "ts": 667918131803.551, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918131821.278, "dur": 7.255, + "args": { + "External id": 250318,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131824.219, "dur": 3.859, + "args": { + "External id": 250319,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918131830.442, "dur": 3.947, + "args": { + "External id": 250320,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131832.311, "dur": 1.912, + "args": { + "External id": 250321,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918131835.802, "dur": 5.947, + "args": { + "External id": 250322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918131837.741, "dur": 3.816, + "args": { + "External id": 250323,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918131873.717, "dur": 187.272, + "args": { + "External id": 250324,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918131963.186, "dur": 3.960, + "args": { + "External id": 250325,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918131969.079, "dur": 3.783, + "args": { + "External id": 250326,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918132074.073, "dur": 3.511, + "args": { + "External id": 250327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918132080.629, "dur": 0.645, + "args": { + "External id": 250328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918132083.228, "dur": 0.779, + "args": { + "External id": 250329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918132111.337, "dur": 252.549, + "args": { + "External id": 250330,"Record function id": 0, "Sequence number": 2987695, "Fwd thread id": 1, "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918132113.053, "dur": 242.499, + "args": { + "External id": 250331,"Sequence number": 2987695, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 474 + } + }, + { + "ph": "f", "id": 49, "pid": 4183438, "tid": 31367, "ts": 667918132113.053, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918132133.178, "dur": 41.639, + "args": { + "External id": 250332,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132136.557, "dur": 2.658, + "args": { + "External id": 250333,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918132140.663, "dur": 33.538, + "args": { + "External id": 250334,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918132185.348, "dur": 3.847, + "args": { + "External id": 250335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132186.781, "dur": 2.107, + "args": { + "External id": 250336,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918132373.161, "dur": 151.597, + "args": { + "External id": 250337,"Record function id": 0, "Sequence number": 2987694, "Fwd thread id": 1, "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918132375.073, "dur": 143.677, + "args": { + "External id": 250338,"Sequence number": 2987694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 481 + } + }, + { + "ph": "f", "id": 50, "pid": 4183438, "tid": 31367, "ts": 667918132375.073, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918132387.360, "dur": 33.530, + "args": { + "External id": 250339,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132390.496, "dur": 3.184, + "args": { + "External id": 250340,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918132394.866, "dur": 25.468, + "args": { + "External id": 250341,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918132429.106, "dur": 6.398, + "args": { + "External id": 250342,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132430.966, "dur": 4.192, + "args": { + "External id": 250343,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132531.194, "dur": 15.174, + "args": { + "External id": 250344,"Record function id": 0, "Sequence number": 2987693, "Fwd thread id": 1, "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132532.903, "dur": 10.707, + "args": { + "External id": 250345,"Sequence number": 2987693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 488 + } + }, + { + "ph": "f", "id": 51, "pid": 4183438, "tid": 31367, "ts": 667918132532.903, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132535.284, "dur": 8.047, + "args": { + "External id": 250346,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132537.034, "dur": 6.096, + "args": { + "External id": 250347,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132549.731, "dur": 6.552, + "args": { + "External id": 250348,"Record function id": 0, "Sequence number": 2987692, "Fwd thread id": 1, "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132550.843, "dur": 3.896, + "args": { + "External id": 250349,"Sequence number": 2987692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 492 + } + }, + { + "ph": "f", "id": 52, "pid": 4183438, "tid": 31367, "ts": 667918132550.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132552.165, "dur": 2.445, + "args": { + "External id": 250350,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132553.553, "dur": 0.901, + "args": { + "External id": 250351,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132559.209, "dur": 7.423, + "args": { + "External id": 250352,"Record function id": 0, "Sequence number": 2987691, "Fwd thread id": 1, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132561.921, "dur": 3.534, + "args": { + "External id": 250353,"Sequence number": 2987691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 496 + } + }, + { + "ph": "f", "id": 53, "pid": 4183438, "tid": 31367, "ts": 667918132561.921, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132563.413, "dur": 1.908, + "args": { + "External id": 250354,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132564.394, "dur": 0.788, + "args": { + "External id": 250355,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132570.059, "dur": 6.575, + "args": { + "External id": 250356,"Record function id": 0, "Sequence number": 2987690, "Fwd thread id": 1, "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132571.276, "dur": 3.011, + "args": { + "External id": 250357,"Sequence number": 2987690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 500 + } + }, + { + "ph": "f", "id": 54, "pid": 4183438, "tid": 31367, "ts": 667918132571.276, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132572.391, "dur": 1.754, + "args": { + "External id": 250358,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132573.227, "dur": 0.825, + "args": { + "External id": 250359,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132580.051, "dur": 238.285, + "args": { + "External id": 250360,"Record function id": 0, "Sequence number": 2987689, "Fwd thread id": 1, "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132580.767, "dur": 227.146, + "args": { + "External id": 250361,"Sequence number": 2987689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 504 + } + }, + { + "ph": "f", "id": 55, "pid": 4183438, "tid": 31367, "ts": 667918132580.767, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132586.291, "dur": 6.287, + "args": { + "External id": 250362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132588.127, "dur": 3.820, + "args": { + "External id": 250363,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132590.215, "dur": 1.445, + "args": { + "External id": 250364,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918132594.160, "dur": 124.596, + "args": { + "External id": 250365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132721.816, "dur": 8.028, + "args": { + "External id": 250366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132723.149, "dur": 5.570, + "args": { + "External id": 250367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132727.219, "dur": 1.306, + "args": { + "External id": 250368,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132754.324, "dur": 3.879, + "args": { + "External id": 250369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132755.312, "dur": 2.336, + "args": { + "External id": 250370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132757.123, "dur": 0.439, + "args": { + "External id": 250371,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918132759.043, "dur": 47.991, + "args": { + "External id": 250372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132826.205, "dur": 7.930, + "args": { + "External id": 250373,"Record function id": 0, "Sequence number": 2987688, "Fwd thread id": 1, "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132827.171, "dur": 5.013, + "args": { + "External id": 250374,"Sequence number": 2987688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 517 + } + }, + { + "ph": "f", "id": 56, "pid": 4183438, "tid": 31367, "ts": 667918132827.171, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132828.765, "dur": 3.268, + "args": { + "External id": 250375,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132830.242, "dur": 1.642, + "args": { + "External id": 250376,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132837.374, "dur": 9.405, + "args": { + "External id": 250377,"Record function id": 0, "Sequence number": 2987687, "Fwd thread id": 1, "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132840.412, "dur": 4.235, + "args": { + "External id": 250378,"Sequence number": 2987687, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 521 + } + }, + { + "ph": "f", "id": 57, "pid": 4183438, "tid": 31367, "ts": 667918132840.412, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132841.194, "dur": 3.217, + "args": { + "External id": 250379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132841.977, "dur": 1.913, + "args": { + "External id": 250380,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132843.443, "dur": 0.313, + "args": { + "External id": 250381,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918132852.371, "dur": 9.707, + "args": { + "External id": 250382,"Record function id": 0, "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918132854.311, "dur": 7.112, + "args": { + "External id": 250383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918132857.283, "dur": 3.730, + "args": { + "External id": 250384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918132858.404, "dur": 2.505, + "args": { + "External id": 250385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132867.195, "dur": 7.800, + "args": { + "External id": 250386,"Record function id": 0, "Sequence number": 2987686, "Fwd thread id": 1, "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132868.433, "dur": 4.345, + "args": { + "External id": 250387,"Sequence number": 2987686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 530 + } + }, + { + "ph": "f", "id": 58, "pid": 4183438, "tid": 31367, "ts": 667918132868.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132869.663, "dur": 2.969, + "args": { + "External id": 250388,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132871.717, "dur": 0.784, + "args": { + "External id": 250389,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132878.394, "dur": 96.202, + "args": { + "External id": 250390,"Record function id": 0, "Sequence number": 2987685, "Fwd thread id": 1, "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132879.031, "dur": 90.584, + "args": { + "External id": 250391,"Sequence number": 2987685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 534 + } + }, + { + "ph": "f", "id": 59, "pid": 4183438, "tid": 31367, "ts": 667918132879.031, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132880.782, "dur": 2.534, + "args": { + "External id": 250392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132881.198, "dur": 1.686, + "args": { + "External id": 250393,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132882.407, "dur": 0.374, + "args": { + "External id": 250394,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918132885.611, "dur": 36.885, + "args": { + "External id": 250395,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132923.742, "dur": 3.287, + "args": { + "External id": 250396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132924.260, "dur": 2.273, + "args": { + "External id": 250397,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132925.831, "dur": 0.592, + "args": { + "External id": 250398,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918132928.018, "dur": 4.662, + "args": { + "External id": 250399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918132928.585, "dur": 3.596, + "args": { + "External id": 250400,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918132929.829, "dur": 2.284, + "args": { + "External id": 250401,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918132935.429, "dur": 33.523, + "args": { + "External id": 250402,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132978.793, "dur": 31.929, + "args": { + "External id": 250403,"Record function id": 0, "Sequence number": 2987684, "Fwd thread id": 1, "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918132979.678, "dur": 3.612, + "args": { + "External id": 250404,"Sequence number": 2987684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 547 + } + }, + { + "ph": "f", "id": 60, "pid": 4183438, "tid": 31367, "ts": 667918132979.678, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918132981.065, "dur": 2.089, + "args": { + "External id": 250405,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918132981.967, "dur": 1.039, + "args": { + "External id": 250406,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918132986.256, "dur": 22.512, + "args": { + "External id": 250407,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133014.361, "dur": 9.312, + "args": { + "External id": 250408,"Record function id": 0, "Sequence number": 2987683, "Fwd thread id": 1, "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133015.214, "dur": 6.437, + "args": { + "External id": 250409,"Sequence number": 2987683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "f", "id": 61, "pid": 4183438, "tid": 31367, "ts": 667918133015.214, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133015.852, "dur": 5.613, + "args": { + "External id": 250410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133018.547, "dur": 2.391, + "args": { + "External id": 250411,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133020.301, "dur": 0.544, + "args": { + "External id": 250412,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133027.814, "dur": 5.460, + "args": { + "External id": 250413,"Record function id": 0, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133029.503, "dur": 3.263, + "args": { + "External id": 250414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918133030.344, "dur": 2.123, + "args": { + "External id": 250415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918133031.103, "dur": 1.257, + "args": { + "External id": 250416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133036.624, "dur": 6.693, + "args": { + "External id": 250417,"Record function id": 0, "Sequence number": 2987682, "Fwd thread id": 1, "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133038.149, "dur": 2.978, + "args": { + "External id": 250418,"Sequence number": 2987682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 561 + } + }, + { + "ph": "f", "id": 62, "pid": 4183438, "tid": 31367, "ts": 667918133038.149, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133039.301, "dur": 1.694, + "args": { + "External id": 250419,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133040.091, "dur": 0.799, + "args": { + "External id": 250420,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133048.233, "dur": 91.455, + "args": { + "External id": 250421,"Record function id": 0, "Sequence number": 2987681, "Fwd thread id": 1, "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133049.054, "dur": 83.765, + "args": { + "External id": 250422,"Sequence number": 2987681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 565 + } + }, + { + "ph": "f", "id": 63, "pid": 4183438, "tid": 31367, "ts": 667918133049.054, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133051.192, "dur": 2.780, + "args": { + "External id": 250423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133051.608, "dur": 1.936, + "args": { + "External id": 250424,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133052.943, "dur": 0.459, + "args": { + "External id": 250425,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918133054.591, "dur": 35.571, + "args": { + "External id": 250426,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133091.422, "dur": 5.456, + "args": { + "External id": 250427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133093.909, "dur": 2.318, + "args": { + "External id": 250428,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133095.437, "dur": 0.665, + "args": { + "External id": 250429,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133098.195, "dur": 2.862, + "args": { + "External id": 250430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133098.805, "dur": 1.824, + "args": { + "External id": 250431,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133100.060, "dur": 0.500, + "args": { + "External id": 250432,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918133101.560, "dur": 30.637, + "args": { + "External id": 250433,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133143.810, "dur": 24.568, + "args": { + "External id": 250434,"Record function id": 0, "Sequence number": 2987680, "Fwd thread id": 1, "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133146.437, "dur": 3.758, + "args": { + "External id": 250435,"Sequence number": 2987680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 578 + } + }, + { + "ph": "f", "id": 64, "pid": 4183438, "tid": 31367, "ts": 667918133146.437, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133147.932, "dur": 2.127, + "args": { + "External id": 250436,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133148.803, "dur": 1.163, + "args": { + "External id": 250437,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918133152.489, "dur": 14.028, + "args": { + "External id": 250438,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133171.951, "dur": 10.855, + "args": { + "External id": 250439,"Record function id": 0, "Sequence number": 2987679, "Fwd thread id": 1, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133172.789, "dur": 8.004, + "args": { + "External id": 250440,"Sequence number": 2987679, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 583 + } + }, + { + "ph": "f", "id": 65, "pid": 4183438, "tid": 31367, "ts": 667918133172.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133173.333, "dur": 7.278, + "args": { + "External id": 250441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133175.781, "dur": 4.296, + "args": { + "External id": 250442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133179.375, "dur": 0.574, + "args": { + "External id": 250443,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133186.935, "dur": 4.671, + "args": { + "External id": 250444,"Record function id": 0, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133188.421, "dur": 2.750, + "args": { + "External id": 250445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918133189.304, "dur": 1.403, + "args": { + "External id": 250446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918133189.620, "dur": 0.976, + "args": { + "External id": 250447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918133215.225, "dur": 375.787, + "args": { + "External id": 250448,"Record function id": 0, "Sequence number": 2987678, "Fwd thread id": 1, "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918133217.152, "dur": 340.413, + "args": { + "External id": 250449,"Sequence number": 2987678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 592 + } + }, + { + "ph": "f", "id": 66, "pid": 4183438, "tid": 31367, "ts": 667918133217.152, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133253.510, "dur": 2.502, + "args": { + "External id": 250450,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133254.269, "dur": 1.580, + "args": { + "External id": 250451,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918133272.201, "dur": 6.555, + "args": { + "External id": 250452,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918133289.869, "dur": 4.631, + "args": { + "External id": 250453,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133452.896, "dur": 1.697, + "args": { + "External id": 250454,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918133458.808, "dur": 34.360, + "args": { + "External id": 250455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133469.297, "dur": 0.955, + "args": { + "External id": 250456,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918133498.595, "dur": 35.256, + "args": { + "External id": 250457,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918133500.180, "dur": 33.471, + "args": { + "External id": 250458,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133504.480, "dur": 3.493, + "args": { + "External id": 250459,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918133509.792, "dur": 23.283, + "args": { + "External id": 250460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918133538.383, "dur": 4.185, + "args": { + "External id": 250461,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133541.376, "dur": 1.054, + "args": { + "External id": 250462,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133548.208, "dur": 2.382, + "args": { + "External id": 250463,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133549.663, "dur": 0.833, + "args": { + "External id": 250464,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918133568.334, "dur": 16.713, + "args": { + "External id": 250465,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133600.743, "dur": 8.349, + "args": { + "External id": 250466,"Record function id": 0, "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918133602.916, "dur": 5.454, + "args": { + "External id": 250467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918133604.913, "dur": 2.580, + "args": { + "External id": 250468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918133605.752, "dur": 1.628, + "args": { + "External id": 250469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133614.612, "dur": 5.380, + "args": { + "External id": 250470,"Record function id": 0, "Sequence number": 2987677, "Fwd thread id": 1, "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918133615.902, "dur": 1.158, + "args": { + "External id": 250471,"Sequence number": 2987677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 614 + } + }, + { + "ph": "f", "id": 67, "pid": 4183438, "tid": 31367, "ts": 667918133615.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918133623.591, "dur": 457.968, + "args": { + "External id": 250472,"Record function id": 0, "Sequence number": 2987676, "Fwd thread id": 1, "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918133624.986, "dur": 445.446, + "args": { + "External id": 250473,"Sequence number": 2987676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 616 + } + }, + { + "ph": "f", "id": 68, "pid": 4183438, "tid": 31367, "ts": 667918133624.986, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133696.644, "dur": 10.151, + "args": { + "External id": 250474,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918133702.464, "dur": 3.957, + "args": { + "External id": 250475,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]", "[4096, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[16777216, 4096, 1], [], []], "Input Dims": [[16, 4096, 1024], [], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133710.419, "dur": 7.180, + "args": { + "External id": 250476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133711.948, "dur": 5.016, + "args": { + "External id": 250477,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133713.932, "dur": 2.876, + "args": { + "External id": 250478,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 31367, + "ts": 667918133722.672, "dur": 103.228, + "args": { + "External id": 250479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [1, 2816], []], "Input Dims": [[65536, 1024], [2816, 1024], []], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918133723.691, "dur": 2.826, + "args": { + "External id": 250480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2816]], "Input Dims": [[2816, 1024]], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918133724.208, "dur": 1.840, + "args": { + "External id": 250481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2816], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133725.562, "dur": 0.415, + "args": { + "External id": 250482,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[2816, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2816], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 31367, + "ts": 667918133727.678, "dur": 97.493, + "args": { + "External id": 250483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918133729.135, "dur": 95.101, + "args": { + "External id": 250484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918133829.719, "dur": 4.972, + "args": { + "External id": 250485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [11534336, 2816, 1]], "Input Dims": [[65536, 2816], [16, 4096, 2816]], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133833.211, "dur": 1.385, + "args": { + "External id": 250486,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918133868.290, "dur": 5.851, + "args": { + "External id": 250487,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918133875.434, "dur": 1.817, + "args": { + "External id": 250488,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918133878.550, "dur": 1.802, + "args": { + "External id": 250489,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133913.546, "dur": 2.233, + "args": { + "External id": 250490,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133914.725, "dur": 0.912, + "args": { + "External id": 250491,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 4183438, "tid": 31367, + "ts": 667918133938.607, "dur": 112.043, + "args": { + "External id": 250492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[4096, 1], [2816, 1]], []], "Input Dims": [[], [[65536, 1024], [65536, 2816]], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918133943.926, "dur": 6.576, + "args": { + "External id": 250493,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133947.454, "dur": 2.268, + "args": { + "External id": 250494,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024, 1]", "[4096, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918133952.477, "dur": 6.374, + "args": { + "External id": 250495,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1, 1], []], "Input Dims": [[65536, 1024, 1], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133957.507, "dur": 0.657, + "args": { + "External id": 250496,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 65536]", "[1, 1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 1], [], [], []], "Input Dims": [[65536, 1024, 1], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918133960.139, "dur": 3.078, + "args": { + "External id": 250497,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133962.453, "dur": 0.436, + "args": { + "External id": 250498,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918133964.457, "dur": 3.083, + "args": { + "External id": 250499,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133966.646, "dur": 0.469, + "args": { + "External id": 250500,"Record function id": 0, "Concrete Inputs": ["", "[1, 2816, 65536]", "[1, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1, 1], [], [], []], "Input Dims": [[65536, 2816, 1], [], [], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918133971.365, "dur": 3.496, + "args": { + "External id": 250501,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 4096], []], "Input Dims": [[1024, 1, 65536], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133973.824, "dur": 0.717, + "args": { + "External id": 250502,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536, 1]", "[1, 4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 4096], [], [], []], "Input Dims": [[1024, 1, 65536], [], [], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133977.514, "dur": 5.289, + "args": { + "External id": 250503,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 4096, 1], []], "Input Dims": [[1024, 65536, 1], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918133980.961, "dur": 1.649, + "args": { + "External id": 250504,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]", "[1024, 1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 4096, 1], [], []], "Input Dims": [[1024, 65536, 1], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918133984.014, "dur": 2.382, + "args": { + "External id": 250505,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 2816], []], "Input Dims": [[1, 2816, 65536], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918133985.777, "dur": 0.311, + "args": { + "External id": 250506,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 2816], [], [], []], "Input Dims": [[1, 2816, 65536], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918133987.465, "dur": 2.975, + "args": { + "External id": 250507,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918133988.493, "dur": 1.855, + "args": { + "External id": 250508,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918133991.840, "dur": 47.929, + "args": { + "External id": 250509,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1, 4096], [184549376, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816]], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134041.817, "dur": 1.013, + "args": { + "External id": 250510,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2883584, 2816, 1], []], "Input Dims": [[1, 1024, 2816], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918134043.766, "dur": 2.851, + "args": { + "External id": 250511,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 2816, 1], []], "Input Dims": [[1024, 1, 2816], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134045.688, "dur": 0.383, + "args": { + "External id": 250512,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816, 1]", "[2816, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 2816, 1], [], [], []], "Input Dims": [[1024, 1, 2816], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134048.906, "dur": 0.696, + "args": { + "External id": 250513,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 2816], []], "Input Dims": [[1024, 2816, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134092.200, "dur": 8.952, + "args": { + "External id": 250514,"Record function id": 0, "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134094.367, "dur": 6.044, + "args": { + "External id": 250515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918134096.424, "dur": 3.125, + "args": { + "External id": 250516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918134097.471, "dur": 1.939, + "args": { + "External id": 250517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134104.657, "dur": 6.592, + "args": { + "External id": 250518,"Record function id": 0, "Sequence number": 2987675, "Fwd thread id": 1, "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134105.867, "dur": 3.509, + "args": { + "External id": 250519,"Sequence number": 2987675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 662 + } + }, + { + "ph": "f", "id": 69, "pid": 4183438, "tid": 31367, "ts": 667918134105.867, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134107.373, "dur": 1.832, + "args": { + "External id": 250520,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134108.278, "dur": 0.762, + "args": { + "External id": 250521,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134114.675, "dur": 122.338, + "args": { + "External id": 250522,"Record function id": 0, "Sequence number": 2987674, "Fwd thread id": 1, "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134115.410, "dur": 114.029, + "args": { + "External id": 250523,"Sequence number": 2987674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 666 + } + }, + { + "ph": "f", "id": 70, "pid": 4183438, "tid": 31367, "ts": 667918134115.410, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134120.159, "dur": 4.070, + "args": { + "External id": 250524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134121.025, "dur": 2.714, + "args": { + "External id": 250525,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134123.189, "dur": 0.421, + "args": { + "External id": 250526,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918134125.191, "dur": 31.922, + "args": { + "External id": 250527,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134158.280, "dur": 5.644, + "args": { + "External id": 250528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134159.013, "dur": 4.357, + "args": { + "External id": 250529,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134162.302, "dur": 0.892, + "args": { + "External id": 250530,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134165.155, "dur": 5.940, + "args": { + "External id": 250531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134166.329, "dur": 4.318, + "args": { + "External id": 250532,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134168.059, "dur": 2.512, + "args": { + "External id": 250533,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918134171.637, "dur": 56.472, + "args": { + "External id": 250534,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134243.628, "dur": 6.745, + "args": { + "External id": 250535,"Record function id": 0, "Sequence number": 2987673, "Fwd thread id": 1, "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134244.729, "dur": 4.468, + "args": { + "External id": 250536,"Sequence number": 2987673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 679 + } + }, + { + "ph": "f", "id": 71, "pid": 4183438, "tid": 31367, "ts": 667918134244.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134246.394, "dur": 2.649, + "args": { + "External id": 250537,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134247.361, "dur": 1.530, + "args": { + "External id": 250538,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134253.949, "dur": 9.435, + "args": { + "External id": 250539,"Record function id": 0, "Sequence number": 2987672, "Fwd thread id": 1, "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134256.712, "dur": 5.017, + "args": { + "External id": 250540,"Sequence number": 2987672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 683 + } + }, + { + "ph": "f", "id": 72, "pid": 4183438, "tid": 31367, "ts": 667918134256.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134257.634, "dur": 3.871, + "args": { + "External id": 250541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134258.720, "dur": 2.311, + "args": { + "External id": 250542,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134260.348, "dur": 0.595, + "args": { + "External id": 250543,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134267.322, "dur": 5.651, + "args": { + "External id": 250544,"Record function id": 0, "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134268.917, "dur": 3.619, + "args": { + "External id": 250545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918134270.318, "dur": 1.958, + "args": { + "External id": 250546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918134270.868, "dur": 1.316, + "args": { + "External id": 250547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134276.226, "dur": 7.652, + "args": { + "External id": 250548,"Record function id": 0, "Sequence number": 2987671, "Fwd thread id": 1, "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134277.577, "dur": 4.224, + "args": { + "External id": 250549,"Sequence number": 2987671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 692 + } + }, + { + "ph": "f", "id": 73, "pid": 4183438, "tid": 31367, "ts": 667918134277.577, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134278.566, "dur": 3.080, + "args": { + "External id": 250550,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134280.853, "dur": 0.641, + "args": { + "External id": 250551,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134287.183, "dur": 95.512, + "args": { + "External id": 250552,"Record function id": 0, "Sequence number": 2987670, "Fwd thread id": 1, "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134288.047, "dur": 86.807, + "args": { + "External id": 250553,"Sequence number": 2987670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 696 + } + }, + { + "ph": "f", "id": 74, "pid": 4183438, "tid": 31367, "ts": 667918134288.047, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134290.390, "dur": 5.575, + "args": { + "External id": 250554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134290.943, "dur": 4.543, + "args": { + "External id": 250555,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134294.897, "dur": 0.443, + "args": { + "External id": 250556,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918134298.285, "dur": 30.112, + "args": { + "External id": 250557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134329.308, "dur": 7.123, + "args": { + "External id": 250558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134329.873, "dur": 6.023, + "args": { + "External id": 250559,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134331.676, "dur": 4.057, + "args": { + "External id": 250560,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134337.501, "dur": 4.384, + "args": { + "External id": 250561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134338.178, "dur": 3.206, + "args": { + "External id": 250562,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134340.969, "dur": 0.351, + "args": { + "External id": 250563,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918134342.328, "dur": 31.693, + "args": { + "External id": 250564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134387.022, "dur": 27.977, + "args": { + "External id": 250565,"Record function id": 0, "Sequence number": 2987669, "Fwd thread id": 1, "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134387.915, "dur": 3.801, + "args": { + "External id": 250566,"Sequence number": 2987669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 709 + } + }, + { + "ph": "f", "id": 75, "pid": 4183438, "tid": 31367, "ts": 667918134387.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134389.332, "dur": 2.253, + "args": { + "External id": 250567,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134390.450, "dur": 0.990, + "args": { + "External id": 250568,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918134394.134, "dur": 18.632, + "args": { + "External id": 250569,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134418.814, "dur": 9.234, + "args": { + "External id": 250570,"Record function id": 0, "Sequence number": 2987668, "Fwd thread id": 1, "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134419.832, "dur": 5.988, + "args": { + "External id": 250571,"Sequence number": 2987668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 714 + } + }, + { + "ph": "f", "id": 76, "pid": 4183438, "tid": 31367, "ts": 667918134419.832, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134420.584, "dur": 4.973, + "args": { + "External id": 250572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134421.531, "dur": 3.500, + "args": { + "External id": 250573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134424.380, "dur": 0.565, + "args": { + "External id": 250574,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134431.786, "dur": 4.769, + "args": { + "External id": 250575,"Record function id": 0, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134433.129, "dur": 3.023, + "args": { + "External id": 250576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918134433.982, "dur": 1.900, + "args": { + "External id": 250577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918134434.477, "dur": 1.332, + "args": { + "External id": 250578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918134442.717, "dur": 461.710, + "args": { + "External id": 250579,"Record function id": 0, "Sequence number": 2987667, "Fwd thread id": 1, "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918134444.227, "dur": 426.809, + "args": { + "External id": 250580,"Sequence number": 2987667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 723 + } + }, + { + "ph": "f", "id": 77, "pid": 4183438, "tid": 31367, "ts": 667918134444.227, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 31367, + "ts": 667918134466.464, "dur": 33.062, + "args": { + "External id": 250581,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918134467.716, "dur": 31.621, + "args": { + "External id": 250582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918134470.748, "dur": 5.678, + "args": { + "External id": 250583,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918134473.195, "dur": 2.727, + "args": { + "External id": 250584,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918134477.749, "dur": 21.087, + "args": { + "External id": 250585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134510.659, "dur": 5.382, + "args": { + "External id": 250586,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134513.153, "dur": 2.746, + "args": { + "External id": 250587,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134520.188, "dur": 1.422, + "args": { + "External id": 250588,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134520.752, "dur": 0.769, + "args": { + "External id": 250589,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918134533.077, "dur": 2.021, + "args": { + "External id": 250590,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918134545.912, "dur": 2.607, + "args": { + "External id": 250591,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134754.322, "dur": 3.564, + "args": { + "External id": 250592,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918134762.883, "dur": 37.804, + "args": { + "External id": 250593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134773.542, "dur": 0.853, + "args": { + "External id": 250594,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918134806.200, "dur": 29.376, + "args": { + "External id": 250595,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918134808.288, "dur": 27.092, + "args": { + "External id": 250596,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134812.630, "dur": 4.383, + "args": { + "External id": 250597,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918134818.730, "dur": 16.074, + "args": { + "External id": 250598,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918134839.850, "dur": 4.557, + "args": { + "External id": 250599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134843.206, "dur": 1.095, + "args": { + "External id": 250600,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134851.521, "dur": 5.277, + "args": { + "External id": 250601,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134853.250, "dur": 3.424, + "args": { + "External id": 250602,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134859.267, "dur": 1.916, + "args": { + "External id": 250603,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134860.283, "dur": 0.788, + "args": { + "External id": 250604,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918134886.221, "dur": 16.607, + "args": { + "External id": 250605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134919.370, "dur": 11.275, + "args": { + "External id": 250606,"Record function id": 0, "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918134921.754, "dur": 8.135, + "args": { + "External id": 250607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918134924.039, "dur": 4.770, + "args": { + "External id": 250608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918134926.615, "dur": 2.055, + "args": { + "External id": 250609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134934.635, "dur": 7.898, + "args": { + "External id": 250610,"Record function id": 0, "Sequence number": 2987666, "Fwd thread id": 1, "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134936.212, "dur": 3.765, + "args": { + "External id": 250611,"Sequence number": 2987666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 754 + } + }, + { + "ph": "f", "id": 78, "pid": 4183438, "tid": 31367, "ts": 667918134936.212, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918134937.766, "dur": 1.969, + "args": { + "External id": 250612,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918134938.393, "dur": 1.198, + "args": { + "External id": 250613,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134945.868, "dur": 144.053, + "args": { + "External id": 250614,"Record function id": 0, "Sequence number": 2987665, "Fwd thread id": 1, "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918134946.744, "dur": 136.349, + "args": { + "External id": 250615,"Sequence number": 2987665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 758 + } + }, + { + "ph": "f", "id": 79, "pid": 4183438, "tid": 31367, "ts": 667918134946.744, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918134949.889, "dur": 7.531, + "args": { + "External id": 250616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918134953.490, "dur": 3.344, + "args": { + "External id": 250617,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918134955.558, "dur": 0.993, + "args": { + "External id": 250618,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918134958.560, "dur": 73.589, + "args": { + "External id": 250619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918135033.444, "dur": 5.753, + "args": { + "External id": 250620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918135034.261, "dur": 4.302, + "args": { + "External id": 250621,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135035.919, "dur": 2.478, + "args": { + "External id": 250622,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918135042.432, "dur": 3.815, + "args": { + "External id": 250623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918135043.376, "dur": 2.323, + "args": { + "External id": 250624,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135045.046, "dur": 0.583, + "args": { + "External id": 250625,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918135047.050, "dur": 35.253, + "args": { + "External id": 250626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135094.726, "dur": 10.020, + "args": { + "External id": 250627,"Record function id": 0, "Sequence number": 2987664, "Fwd thread id": 1, "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135095.838, "dur": 6.712, + "args": { + "External id": 250628,"Sequence number": 2987664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 771 + } + }, + { + "ph": "f", "id": 80, "pid": 4183438, "tid": 31367, "ts": 667918135095.838, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135097.702, "dur": 4.696, + "args": { + "External id": 250629,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135098.814, "dur": 3.429, + "args": { + "External id": 250630,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135108.037, "dur": 9.155, + "args": { + "External id": 250631,"Record function id": 0, "Sequence number": 2987663, "Fwd thread id": 1, "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135111.013, "dur": 4.167, + "args": { + "External id": 250632,"Sequence number": 2987663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 775 + } + }, + { + "ph": "f", "id": 81, "pid": 4183438, "tid": 31367, "ts": 667918135111.013, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918135111.996, "dur": 2.999, + "args": { + "External id": 250633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918135112.712, "dur": 1.793, + "args": { + "External id": 250634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135113.951, "dur": 0.435, + "args": { + "External id": 250635,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918135121.276, "dur": 5.244, + "args": { + "External id": 250636,"Record function id": 0, "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918135122.832, "dur": 3.169, + "args": { + "External id": 250637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918135123.978, "dur": 1.764, + "args": { + "External id": 250638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918135124.635, "dur": 1.030, + "args": { + "External id": 250639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135129.625, "dur": 7.579, + "args": { + "External id": 250640,"Record function id": 0, "Sequence number": 2987662, "Fwd thread id": 1, "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135130.351, "dur": 4.911, + "args": { + "External id": 250641,"Sequence number": 2987662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 784 + } + }, + { + "ph": "f", "id": 82, "pid": 4183438, "tid": 31367, "ts": 667918135130.351, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135132.958, "dur": 2.144, + "args": { + "External id": 250642,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135133.778, "dur": 1.194, + "args": { + "External id": 250643,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135141.164, "dur": 301.153, + "args": { + "External id": 250644,"Record function id": 0, "Sequence number": 2987661, "Fwd thread id": 1, "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135142.632, "dur": 280.693, + "args": { + "External id": 250645,"Sequence number": 2987661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 788 + } + }, + { + "ph": "f", "id": 83, "pid": 4183438, "tid": 31367, "ts": 667918135142.632, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918135157.512, "dur": 6.479, + "args": { + "External id": 250646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135160.347, "dur": 3.142, + "args": { + "External id": 250647,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918135165.930, "dur": 3.258, + "args": { + "External id": 250648,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135167.209, "dur": 1.766, + "args": { + "External id": 250649,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918135170.996, "dur": 5.506, + "args": { + "External id": 250650,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135173.140, "dur": 3.176, + "args": { + "External id": 250651,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918135219.864, "dur": 178.288, + "args": { + "External id": 250652,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918135301.181, "dur": 4.938, + "args": { + "External id": 250653,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918135308.353, "dur": 3.870, + "args": { + "External id": 250654,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918135410.559, "dur": 3.913, + "args": { + "External id": 250655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918135417.545, "dur": 0.680, + "args": { + "External id": 250656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918135420.192, "dur": 0.553, + "args": { + "External id": 250657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135452.384, "dur": 261.531, + "args": { + "External id": 250658,"Record function id": 0, "Sequence number": 2987660, "Fwd thread id": 1, "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135454.220, "dur": 250.678, + "args": { + "External id": 250659,"Sequence number": 2987660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 802 + } + }, + { + "ph": "f", "id": 84, "pid": 4183438, "tid": 31367, "ts": 667918135454.220, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918135473.651, "dur": 42.833, + "args": { + "External id": 250660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135476.834, "dur": 2.827, + "args": { + "External id": 250661,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918135481.321, "dur": 34.553, + "args": { + "External id": 250662,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918135525.735, "dur": 4.063, + "args": { + "External id": 250663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135527.531, "dur": 1.928, + "args": { + "External id": 250664,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135724.344, "dur": 153.571, + "args": { + "External id": 250665,"Record function id": 0, "Sequence number": 2987659, "Fwd thread id": 1, "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918135726.189, "dur": 145.933, + "args": { + "External id": 250666,"Sequence number": 2987659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 809 + } + }, + { + "ph": "f", "id": 85, "pid": 4183438, "tid": 31367, "ts": 667918135726.189, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918135740.046, "dur": 34.413, + "args": { + "External id": 250667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135743.093, "dur": 3.509, + "args": { + "External id": 250668,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918135747.672, "dur": 26.197, + "args": { + "External id": 250669,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918135782.895, "dur": 4.613, + "args": { + "External id": 250670,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135784.763, "dur": 2.445, + "args": { + "External id": 250671,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135884.375, "dur": 16.853, + "args": { + "External id": 250672,"Record function id": 0, "Sequence number": 2987658, "Fwd thread id": 1, "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135885.632, "dur": 13.075, + "args": { + "External id": 250673,"Sequence number": 2987658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 816 + } + }, + { + "ph": "f", "id": 86, "pid": 4183438, "tid": 31367, "ts": 667918135885.632, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135888.161, "dur": 10.250, + "args": { + "External id": 250674,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135891.963, "dur": 6.244, + "args": { + "External id": 250675,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135904.850, "dur": 6.499, + "args": { + "External id": 250676,"Record function id": 0, "Sequence number": 2987657, "Fwd thread id": 1, "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135905.880, "dur": 3.416, + "args": { + "External id": 250677,"Sequence number": 2987657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 820 + } + }, + { + "ph": "f", "id": 87, "pid": 4183438, "tid": 31367, "ts": 667918135905.880, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135907.038, "dur": 2.127, + "args": { + "External id": 250678,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135908.104, "dur": 0.942, + "args": { + "External id": 250679,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135915.986, "dur": 5.348, + "args": { + "External id": 250680,"Record function id": 0, "Sequence number": 2987656, "Fwd thread id": 1, "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135916.691, "dur": 3.030, + "args": { + "External id": 250681,"Sequence number": 2987656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 824 + } + }, + { + "ph": "f", "id": 88, "pid": 4183438, "tid": 31367, "ts": 667918135916.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135917.835, "dur": 1.758, + "args": { + "External id": 250682,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135918.785, "dur": 0.703, + "args": { + "External id": 250683,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135924.748, "dur": 7.888, + "args": { + "External id": 250684,"Record function id": 0, "Sequence number": 2987655, "Fwd thread id": 1, "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135926.220, "dur": 4.796, + "args": { + "External id": 250685,"Sequence number": 2987655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 828 + } + }, + { + "ph": "f", "id": 89, "pid": 4183438, "tid": 31367, "ts": 667918135926.220, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918135927.126, "dur": 3.734, + "args": { + "External id": 250686,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918135929.671, "dur": 1.096, + "args": { + "External id": 250687,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135936.007, "dur": 154.584, + "args": { + "External id": 250688,"Record function id": 0, "Sequence number": 2987654, "Fwd thread id": 1, "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918135936.669, "dur": 147.019, + "args": { + "External id": 250689,"Sequence number": 2987654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 832 + } + }, + { + "ph": "f", "id": 90, "pid": 4183438, "tid": 31367, "ts": 667918135936.669, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918135940.314, "dur": 6.949, + "args": { + "External id": 250690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918135942.228, "dur": 4.398, + "args": { + "External id": 250691,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918135944.716, "dur": 1.635, + "args": { + "External id": 250692,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918135950.492, "dur": 80.426, + "args": { + "External id": 250693,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136032.335, "dur": 4.771, + "args": { + "External id": 250694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136033.421, "dur": 2.957, + "args": { + "External id": 250695,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136034.987, "dur": 1.171, + "args": { + "External id": 250696,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136038.786, "dur": 5.191, + "args": { + "External id": 250697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136039.727, "dur": 3.731, + "args": { + "External id": 250698,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136042.837, "dur": 0.503, + "args": { + "External id": 250699,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136044.539, "dur": 38.356, + "args": { + "External id": 250700,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136095.050, "dur": 7.008, + "args": { + "External id": 250701,"Record function id": 0, "Sequence number": 2987653, "Fwd thread id": 1, "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136095.966, "dur": 4.521, + "args": { + "External id": 250702,"Sequence number": 2987653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 845 + } + }, + { + "ph": "f", "id": 91, "pid": 4183438, "tid": 31367, "ts": 667918136095.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136097.626, "dur": 2.725, + "args": { + "External id": 250703,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136099.173, "dur": 1.032, + "args": { + "External id": 250704,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136105.272, "dur": 8.515, + "args": { + "External id": 250705,"Record function id": 0, "Sequence number": 2987652, "Fwd thread id": 1, "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136106.175, "dur": 5.685, + "args": { + "External id": 250706,"Sequence number": 2987652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 849 + } + }, + { + "ph": "f", "id": 92, "pid": 4183438, "tid": 31367, "ts": 667918136106.175, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136107.116, "dur": 4.517, + "args": { + "External id": 250707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136107.697, "dur": 3.482, + "args": { + "External id": 250708,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136110.705, "dur": 0.350, + "args": { + "External id": 250709,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136119.655, "dur": 9.703, + "args": { + "External id": 250710,"Record function id": 0, "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136121.684, "dur": 6.996, + "args": { + "External id": 250711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918136124.440, "dur": 3.840, + "args": { + "External id": 250712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918136125.617, "dur": 2.573, + "args": { + "External id": 250713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136132.731, "dur": 5.866, + "args": { + "External id": 250714,"Record function id": 0, "Sequence number": 2987651, "Fwd thread id": 1, "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136133.915, "dur": 2.889, + "args": { + "External id": 250715,"Sequence number": 2987651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 858 + } + }, + { + "ph": "f", "id": 93, "pid": 4183438, "tid": 31367, "ts": 667918136133.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136134.952, "dur": 1.695, + "args": { + "External id": 250716,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136135.791, "dur": 0.703, + "args": { + "External id": 250717,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136141.957, "dur": 125.040, + "args": { + "External id": 250718,"Record function id": 0, "Sequence number": 2987650, "Fwd thread id": 1, "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136142.671, "dur": 117.108, + "args": { + "External id": 250719,"Sequence number": 2987650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 862 + } + }, + { + "ph": "f", "id": 94, "pid": 4183438, "tid": 31367, "ts": 667918136142.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136146.797, "dur": 2.970, + "args": { + "External id": 250720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136147.495, "dur": 1.849, + "args": { + "External id": 250721,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136148.824, "dur": 0.391, + "args": { + "External id": 250722,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136150.494, "dur": 37.098, + "args": { + "External id": 250723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136188.625, "dur": 23.300, + "args": { + "External id": 250724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136189.337, "dur": 21.605, + "args": { + "External id": 250725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136191.301, "dur": 19.064, + "args": { + "External id": 250726,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136215.558, "dur": 5.937, + "args": { + "External id": 250727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136216.706, "dur": 4.260, + "args": { + "External id": 250728,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136218.481, "dur": 2.421, + "args": { + "External id": 250729,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136222.328, "dur": 36.779, + "args": { + "External id": 250730,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136272.817, "dur": 35.197, + "args": { + "External id": 250731,"Record function id": 0, "Sequence number": 2987649, "Fwd thread id": 1, "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136274.004, "dur": 4.557, + "args": { + "External id": 250732,"Sequence number": 2987649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 875 + } + }, + { + "ph": "f", "id": 95, "pid": 4183438, "tid": 31367, "ts": 667918136274.004, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136275.646, "dur": 2.773, + "args": { + "External id": 250733,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136276.961, "dur": 1.316, + "args": { + "External id": 250734,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918136281.516, "dur": 22.771, + "args": { + "External id": 250735,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136311.778, "dur": 7.534, + "args": { + "External id": 250736,"Record function id": 0, "Sequence number": 2987648, "Fwd thread id": 1, "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136312.626, "dur": 4.961, + "args": { + "External id": 250737,"Sequence number": 2987648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 880 + } + }, + { + "ph": "f", "id": 96, "pid": 4183438, "tid": 31367, "ts": 667918136312.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136313.469, "dur": 3.921, + "args": { + "External id": 250738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136314.301, "dur": 2.558, + "args": { + "External id": 250739,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136316.147, "dur": 0.572, + "args": { + "External id": 250740,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136323.467, "dur": 8.603, + "args": { + "External id": 250741,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136325.173, "dur": 6.317, + "args": { + "External id": 250742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918136326.444, "dur": 4.724, + "args": { + "External id": 250743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918136329.507, "dur": 1.544, + "args": { + "External id": 250744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136335.334, "dur": 6.393, + "args": { + "External id": 250745,"Record function id": 0, "Sequence number": 2987647, "Fwd thread id": 1, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136336.686, "dur": 3.009, + "args": { + "External id": 250746,"Sequence number": 2987647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 889 + } + }, + { + "ph": "f", "id": 97, "pid": 4183438, "tid": 31367, "ts": 667918136336.686, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136337.861, "dur": 1.690, + "args": { + "External id": 250747,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136338.545, "dur": 0.855, + "args": { + "External id": 250748,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136346.718, "dur": 98.499, + "args": { + "External id": 250749,"Record function id": 0, "Sequence number": 2987646, "Fwd thread id": 1, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136347.517, "dur": 90.849, + "args": { + "External id": 250750,"Sequence number": 2987646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 893 + } + }, + { + "ph": "f", "id": 98, "pid": 4183438, "tid": 31367, "ts": 667918136347.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136349.795, "dur": 5.079, + "args": { + "External id": 250751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136350.358, "dur": 4.045, + "args": { + "External id": 250752,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136353.788, "dur": 0.502, + "args": { + "External id": 250753,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136355.633, "dur": 37.443, + "args": { + "External id": 250754,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136394.046, "dur": 3.949, + "args": { + "External id": 250755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136394.589, "dur": 2.807, + "args": { + "External id": 250756,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136395.896, "dur": 1.365, + "args": { + "External id": 250757,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136399.493, "dur": 7.070, + "args": { + "External id": 250758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136404.059, "dur": 2.029, + "args": { + "External id": 250759,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136405.627, "dur": 0.386, + "args": { + "External id": 250760,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136407.180, "dur": 30.478, + "args": { + "External id": 250761,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136449.568, "dur": 23.709, + "args": { + "External id": 250762,"Record function id": 0, "Sequence number": 2987645, "Fwd thread id": 1, "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136450.396, "dur": 3.794, + "args": { + "External id": 250763,"Sequence number": 2987645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 906 + } + }, + { + "ph": "f", "id": 99, "pid": 4183438, "tid": 31367, "ts": 667918136450.396, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136451.897, "dur": 2.150, + "args": { + "External id": 250764,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136452.666, "dur": 1.233, + "args": { + "External id": 250765,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918136456.293, "dur": 15.049, + "args": { + "External id": 250766,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136476.749, "dur": 9.343, + "args": { + "External id": 250767,"Record function id": 0, "Sequence number": 2987644, "Fwd thread id": 1, "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136479.753, "dur": 4.687, + "args": { + "External id": 250768,"Sequence number": 2987644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 911 + } + }, + { + "ph": "f", "id": 100, "pid": 4183438, "tid": 31367, "ts": 667918136479.753, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136480.594, "dur": 3.628, + "args": { + "External id": 250769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136481.411, "dur": 2.262, + "args": { + "External id": 250770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136483.120, "dur": 0.444, + "args": { + "External id": 250771,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136490.157, "dur": 4.738, + "args": { + "External id": 250772,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136491.768, "dur": 2.692, + "args": { + "External id": 250773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918136492.591, "dur": 1.567, + "args": { + "External id": 250774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918136493.204, "dur": 0.831, + "args": { + "External id": 250775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918136499.299, "dur": 401.870, + "args": { + "External id": 250776,"Record function id": 0, "Sequence number": 2987643, "Fwd thread id": 1, "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918136500.662, "dur": 364.341, + "args": { + "External id": 250777,"Sequence number": 2987643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 920 + } + }, + { + "ph": "f", "id": 101, "pid": 4183438, "tid": 31367, "ts": 667918136500.662, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136533.820, "dur": 3.582, + "args": { + "External id": 250778,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136536.212, "dur": 1.002, + "args": { + "External id": 250779,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918136552.918, "dur": 5.674, + "args": { + "External id": 250780,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918136568.218, "dur": 2.964, + "args": { + "External id": 250781,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136754.529, "dur": 3.135, + "args": { + "External id": 250782,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918136762.850, "dur": 37.797, + "args": { + "External id": 250783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136774.830, "dur": 0.945, + "args": { + "External id": 250784,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918136809.352, "dur": 33.212, + "args": { + "External id": 250785,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918136811.126, "dur": 31.185, + "args": { + "External id": 250786,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136817.281, "dur": 5.568, + "args": { + "External id": 250787,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918136824.632, "dur": 17.149, + "args": { + "External id": 250788,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918136847.202, "dur": 2.575, + "args": { + "External id": 250789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136848.557, "dur": 1.123, + "args": { + "External id": 250790,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136855.776, "dur": 1.923, + "args": { + "External id": 250791,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918136856.871, "dur": 0.719, + "args": { + "External id": 250792,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918136877.419, "dur": 20.218, + "args": { + "External id": 250793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136913.783, "dur": 9.268, + "args": { + "External id": 250794,"Record function id": 0, "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918136916.346, "dur": 5.970, + "args": { + "External id": 250795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918136918.471, "dur": 2.905, + "args": { + "External id": 250796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918136919.463, "dur": 1.823, + "args": { + "External id": 250797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136926.704, "dur": 5.722, + "args": { + "External id": 250798,"Record function id": 0, "Sequence number": 2987642, "Fwd thread id": 1, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918136927.862, "dur": 1.078, + "args": { + "External id": 250799,"Sequence number": 2987642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 942 + } + }, + { + "ph": "f", "id": 102, "pid": 4183438, "tid": 31367, "ts": 667918136927.862, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918136936.132, "dur": 415.081, + "args": { + "External id": 250800,"Record function id": 0, "Sequence number": 2987641, "Fwd thread id": 1, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918136937.478, "dur": 401.520, + "args": { + "External id": 250801,"Sequence number": 2987641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 944 + } + }, + { + "ph": "f", "id": 103, "pid": 4183438, "tid": 31367, "ts": 667918136937.478, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918136967.507, "dur": 9.028, + "args": { + "External id": 250802,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918136973.236, "dur": 3.007, + "args": { + "External id": 250803,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]", "[4096, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[16777216, 4096, 1], [], []], "Input Dims": [[16, 4096, 1024], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136979.568, "dur": 5.043, + "args": { + "External id": 250804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136980.802, "dur": 3.122, + "args": { + "External id": 250805,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136983.043, "dur": 0.689, + "args": { + "External id": 250806,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 31367, + "ts": 667918136988.295, "dur": 84.840, + "args": { + "External id": 250807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [1, 2816], []], "Input Dims": [[65536, 1024], [2816, 1024], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918136989.162, "dur": 6.502, + "args": { + "External id": 250808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2816]], "Input Dims": [[2816, 1024]], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918136991.588, "dur": 3.626, + "args": { + "External id": 250809,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2816], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918136993.016, "dur": 2.120, + "args": { + "External id": 250810,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[2816, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2816], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 31367, + "ts": 667918136997.003, "dur": 75.597, + "args": { + "External id": 250811,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918136998.155, "dur": 73.431, + "args": { + "External id": 250812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918137076.719, "dur": 2.814, + "args": { + "External id": 250813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [11534336, 2816, 1]], "Input Dims": [[65536, 2816], [16, 4096, 2816]], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137078.083, "dur": 1.312, + "args": { + "External id": 250814,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137112.914, "dur": 5.401, + "args": { + "External id": 250815,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137119.840, "dur": 1.539, + "args": { + "External id": 250816,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137122.446, "dur": 1.638, + "args": { + "External id": 250817,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137155.801, "dur": 2.342, + "args": { + "External id": 250818,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137156.943, "dur": 1.057, + "args": { + "External id": 250819,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 4183438, "tid": 31367, + "ts": 667918137180.712, "dur": 137.561, + "args": { + "External id": 250820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[4096, 1], [2816, 1]], []], "Input Dims": [[], [[65536, 1024], [65536, 2816]], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918137185.729, "dur": 5.099, + "args": { + "External id": 250821,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137189.422, "dur": 0.721, + "args": { + "External id": 250822,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024, 1]", "[4096, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918137209.072, "dur": 9.008, + "args": { + "External id": 250823,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1, 1], []], "Input Dims": [[65536, 1024, 1], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137214.758, "dur": 2.326, + "args": { + "External id": 250824,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 65536]", "[1, 1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 1], [], [], []], "Input Dims": [[65536, 1024, 1], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918137219.583, "dur": 2.671, + "args": { + "External id": 250825,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137221.526, "dur": 0.399, + "args": { + "External id": 250826,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918137223.622, "dur": 2.964, + "args": { + "External id": 250827,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137225.189, "dur": 0.759, + "args": { + "External id": 250828,"Record function id": 0, "Concrete Inputs": ["", "[1, 2816, 65536]", "[1, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1, 1], [], [], []], "Input Dims": [[65536, 2816, 1], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918137230.493, "dur": 3.038, + "args": { + "External id": 250829,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 4096], []], "Input Dims": [[1024, 1, 65536], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137232.618, "dur": 0.592, + "args": { + "External id": 250830,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536, 1]", "[1, 4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 4096], [], [], []], "Input Dims": [[1024, 1, 65536], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137234.613, "dur": 6.329, + "args": { + "External id": 250831,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 4096, 1], []], "Input Dims": [[1024, 65536, 1], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918137238.673, "dur": 2.099, + "args": { + "External id": 250832,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]", "[1024, 1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 4096, 1], [], []], "Input Dims": [[1024, 65536, 1], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918137241.924, "dur": 2.696, + "args": { + "External id": 250833,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 2816], []], "Input Dims": [[1, 2816, 65536], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137243.821, "dur": 0.488, + "args": { + "External id": 250834,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 2816], [], [], []], "Input Dims": [[1, 2816, 65536], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137247.717, "dur": 3.224, + "args": { + "External id": 250835,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137248.630, "dur": 2.220, + "args": { + "External id": 250836,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918137252.357, "dur": 52.398, + "args": { + "External id": 250837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1, 4096], [184549376, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816]], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137306.745, "dur": 3.300, + "args": { + "External id": 250838,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2883584, 2816, 1], []], "Input Dims": [[1, 1024, 2816], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918137310.941, "dur": 3.281, + "args": { + "External id": 250839,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 2816, 1], []], "Input Dims": [[1024, 1, 2816], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137313.172, "dur": 0.512, + "args": { + "External id": 250840,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816, 1]", "[2816, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 2816, 1], [], [], []], "Input Dims": [[1024, 1, 2816], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137316.293, "dur": 0.865, + "args": { + "External id": 250841,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 2816], []], "Input Dims": [[1024, 2816, 1], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137362.543, "dur": 9.684, + "args": { + "External id": 250842,"Record function id": 0, "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137364.962, "dur": 6.592, + "args": { + "External id": 250843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918137367.129, "dur": 3.507, + "args": { + "External id": 250844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918137368.195, "dur": 2.321, + "args": { + "External id": 250845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137376.293, "dur": 6.455, + "args": { + "External id": 250846,"Record function id": 0, "Sequence number": 2987640, "Fwd thread id": 1, "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137377.769, "dur": 3.392, + "args": { + "External id": 250847,"Sequence number": 2987640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 990 + } + }, + { + "ph": "f", "id": 104, "pid": 4183438, "tid": 31367, "ts": 667918137377.769, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137379.199, "dur": 1.766, + "args": { + "External id": 250848,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137379.925, "dur": 0.910, + "args": { + "External id": 250849,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137386.465, "dur": 107.278, + "args": { + "External id": 250850,"Record function id": 0, "Sequence number": 2987639, "Fwd thread id": 1, "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137387.440, "dur": 100.617, + "args": { + "External id": 250851,"Sequence number": 2987639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 994 + } + }, + { + "ph": "f", "id": 105, "pid": 4183438, "tid": 31367, "ts": 667918137387.440, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137390.718, "dur": 5.896, + "args": { + "External id": 250852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137391.883, "dur": 4.131, + "args": { + "External id": 250853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137395.034, "dur": 0.792, + "args": { + "External id": 250854,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918137397.554, "dur": 37.671, + "args": { + "External id": 250855,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137436.096, "dur": 3.721, + "args": { + "External id": 250856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137436.618, "dur": 2.561, + "args": { + "External id": 250857,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137438.163, "dur": 0.866, + "args": { + "External id": 250858,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137441.262, "dur": 5.268, + "args": { + "External id": 250859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137443.997, "dur": 2.065, + "args": { + "External id": 250860,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137445.617, "dur": 0.334, + "args": { + "External id": 250861,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918137447.024, "dur": 40.067, + "args": { + "External id": 250862,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137498.511, "dur": 6.311, + "args": { + "External id": 250863,"Record function id": 0, "Sequence number": 2987638, "Fwd thread id": 1, "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137499.315, "dur": 4.264, + "args": { + "External id": 250864,"Sequence number": 2987638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1007 + } + }, + { + "ph": "f", "id": 106, "pid": 4183438, "tid": 31367, "ts": 667918137499.315, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137501.020, "dur": 2.412, + "args": { + "External id": 250865,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137502.262, "dur": 1.032, + "args": { + "External id": 250866,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137508.225, "dur": 9.060, + "args": { + "External id": 250867,"Record function id": 0, "Sequence number": 2987637, "Fwd thread id": 1, "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137509.085, "dur": 5.941, + "args": { + "External id": 250868,"Sequence number": 2987637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1011 + } + }, + { + "ph": "f", "id": 107, "pid": 4183438, "tid": 31367, "ts": 667918137509.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137509.831, "dur": 4.975, + "args": { + "External id": 250869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137511.940, "dur": 2.364, + "args": { + "External id": 250870,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137513.334, "dur": 0.799, + "args": { + "External id": 250871,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137521.205, "dur": 4.807, + "args": { + "External id": 250872,"Record function id": 0, "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137522.611, "dur": 2.934, + "args": { + "External id": 250873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918137523.664, "dur": 1.650, + "args": { + "External id": 250874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918137524.318, "dur": 0.881, + "args": { + "External id": 250875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137530.905, "dur": 5.822, + "args": { + "External id": 250876,"Record function id": 0, "Sequence number": 2987636, "Fwd thread id": 1, "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137532.055, "dur": 2.676, + "args": { + "External id": 250877,"Sequence number": 2987636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 1020 + } + }, + { + "ph": "f", "id": 108, "pid": 4183438, "tid": 31367, "ts": 667918137532.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137533.070, "dur": 1.525, + "args": { + "External id": 250878,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137533.679, "dur": 0.772, + "args": { + "External id": 250879,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137541.691, "dur": 104.283, + "args": { + "External id": 250880,"Record function id": 0, "Sequence number": 2987635, "Fwd thread id": 1, "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137559.370, "dur": 79.857, + "args": { + "External id": 250881,"Sequence number": 2987635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1024 + } + }, + { + "ph": "f", "id": 109, "pid": 4183438, "tid": 31367, "ts": 667918137559.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137561.502, "dur": 2.797, + "args": { + "External id": 250882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137561.967, "dur": 1.869, + "args": { + "External id": 250883,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137563.316, "dur": 0.418, + "args": { + "External id": 250884,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918137564.777, "dur": 27.973, + "args": { + "External id": 250885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137594.927, "dur": 4.344, + "args": { + "External id": 250886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137595.399, "dur": 3.342, + "args": { + "External id": 250887,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137596.701, "dur": 1.888, + "args": { + "External id": 250888,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137600.163, "dur": 4.507, + "args": { + "External id": 250889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137600.909, "dur": 3.335, + "args": { + "External id": 250890,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137602.130, "dur": 2.050, + "args": { + "External id": 250891,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918137607.047, "dur": 31.538, + "args": { + "External id": 250892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137650.331, "dur": 77.607, + "args": { + "External id": 250893,"Record function id": 0, "Sequence number": 2987634, "Fwd thread id": 1, "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137651.289, "dur": 42.631, + "args": { + "External id": 250894,"Sequence number": 2987634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1037 + } + }, + { + "ph": "f", "id": 110, "pid": 4183438, "tid": 31367, "ts": 667918137651.289, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137652.752, "dur": 40.919, + "args": { + "External id": 250895,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137653.674, "dur": 39.448, + "args": { + "External id": 250896,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918137698.543, "dur": 26.189, + "args": { + "External id": 250897,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137733.242, "dur": 9.815, + "args": { + "External id": 250898,"Record function id": 0, "Sequence number": 2987633, "Fwd thread id": 1, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918137734.625, "dur": 6.505, + "args": { + "External id": 250899,"Sequence number": 2987633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1042 + } + }, + { + "ph": "f", "id": 111, "pid": 4183438, "tid": 31367, "ts": 667918137734.625, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918137735.340, "dur": 5.564, + "args": { + "External id": 250900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918137736.069, "dur": 4.231, + "args": { + "External id": 250901,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918137739.622, "dur": 0.547, + "args": { + "External id": 250902,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137747.241, "dur": 5.502, + "args": { + "External id": 250903,"Record function id": 0, "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918137748.730, "dur": 3.458, + "args": { + "External id": 250904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918137749.791, "dur": 1.865, + "args": { + "External id": 250905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918137750.297, "dur": 1.249, + "args": { + "External id": 250906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918137757.249, "dur": 409.080, + "args": { + "External id": 250907,"Record function id": 0, "Sequence number": 2987632, "Fwd thread id": 1, "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918137758.877, "dur": 376.796, + "args": { + "External id": 250908,"Sequence number": 2987632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 1051 + } + }, + { + "ph": "f", "id": 112, "pid": 4183438, "tid": 31367, "ts": 667918137758.877, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 31367, + "ts": 667918137785.873, "dur": 33.919, + "args": { + "External id": 250909,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918137787.157, "dur": 32.398, + "args": { + "External id": 250910,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918137789.960, "dur": 5.934, + "args": { + "External id": 250911,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137792.555, "dur": 2.877, + "args": { + "External id": 250912,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918137797.274, "dur": 21.847, + "args": { + "External id": 250913,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137831.805, "dur": 2.646, + "args": { + "External id": 250914,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137832.895, "dur": 1.411, + "args": { + "External id": 250915,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918137840.033, "dur": 3.025, + "args": { + "External id": 250916,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918137840.742, "dur": 2.225, + "args": { + "External id": 250917,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137855.177, "dur": 2.064, + "args": { + "External id": 250918,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918137868.511, "dur": 3.157, + "args": { + "External id": 250919,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138032.785, "dur": 2.536, + "args": { + "External id": 250920,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918138039.676, "dur": 31.133, + "args": { + "External id": 250921,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138047.663, "dur": 0.841, + "args": { + "External id": 250922,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918138075.958, "dur": 26.934, + "args": { + "External id": 250923,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918138077.553, "dur": 25.106, + "args": { + "External id": 250924,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138081.573, "dur": 4.044, + "args": { + "External id": 250925,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918138086.955, "dur": 15.128, + "args": { + "External id": 250926,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918138107.161, "dur": 2.462, + "args": { + "External id": 250927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138108.473, "dur": 1.004, + "args": { + "External id": 250928,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918138117.947, "dur": 2.778, + "args": { + "External id": 250929,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138119.194, "dur": 1.357, + "args": { + "External id": 250930,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918138122.957, "dur": 3.765, + "args": { + "External id": 250931,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138124.125, "dur": 2.494, + "args": { + "External id": 250932,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918138148.191, "dur": 16.723, + "args": { + "External id": 250933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918138175.501, "dur": 9.690, + "args": { + "External id": 250934,"Record function id": 0, "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918138177.545, "dur": 7.014, + "args": { + "External id": 250935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918138179.322, "dur": 4.448, + "args": { + "External id": 250936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918138181.884, "dur": 1.756, + "args": { + "External id": 250937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138189.068, "dur": 29.131, + "args": { + "External id": 250938,"Record function id": 0, "Sequence number": 2987631, "Fwd thread id": 1, "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138190.346, "dur": 23.679, + "args": { + "External id": 250939,"Sequence number": 2987631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1082 + } + }, + { + "ph": "f", "id": 113, "pid": 4183438, "tid": 31367, "ts": 667918138190.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918138209.968, "dur": 3.847, + "args": { + "External id": 250940,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138211.509, "dur": 2.044, + "args": { + "External id": 250941,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138223.415, "dur": 143.258, + "args": { + "External id": 250942,"Record function id": 0, "Sequence number": 2987630, "Fwd thread id": 1, "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138224.420, "dur": 135.660, + "args": { + "External id": 250943,"Sequence number": 2987630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1086 + } + }, + { + "ph": "f", "id": 114, "pid": 4183438, "tid": 31367, "ts": 667918138224.420, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918138228.153, "dur": 6.659, + "args": { + "External id": 250944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918138229.439, "dur": 4.776, + "args": { + "External id": 250945,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138232.865, "dur": 1.095, + "args": { + "External id": 250946,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918138235.823, "dur": 70.978, + "args": { + "External id": 250947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918138308.033, "dur": 5.832, + "args": { + "External id": 250948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918138309.291, "dur": 4.062, + "args": { + "External id": 250949,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138311.144, "dur": 2.020, + "args": { + "External id": 250950,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918138315.591, "dur": 5.129, + "args": { + "External id": 250951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918138318.364, "dur": 1.904, + "args": { + "External id": 250952,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138319.853, "dur": 0.344, + "args": { + "External id": 250953,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918138321.375, "dur": 37.940, + "args": { + "External id": 250954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138371.817, "dur": 9.192, + "args": { + "External id": 250955,"Record function id": 0, "Sequence number": 2987629, "Fwd thread id": 1, "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138372.831, "dur": 6.158, + "args": { + "External id": 250956,"Sequence number": 2987629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1099 + } + }, + { + "ph": "f", "id": 115, "pid": 4183438, "tid": 31367, "ts": 667918138372.831, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918138374.534, "dur": 4.297, + "args": { + "External id": 250957,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138375.622, "dur": 3.104, + "args": { + "External id": 250958,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138384.593, "dur": 8.644, + "args": { + "External id": 250959,"Record function id": 0, "Sequence number": 2987628, "Fwd thread id": 1, "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138385.370, "dur": 5.738, + "args": { + "External id": 250960,"Sequence number": 2987628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1103 + } + }, + { + "ph": "f", "id": 116, "pid": 4183438, "tid": 31367, "ts": 667918138385.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918138386.095, "dur": 4.791, + "args": { + "External id": 250961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918138388.281, "dur": 2.142, + "args": { + "External id": 250962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138389.766, "dur": 0.523, + "args": { + "External id": 250963,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918138397.343, "dur": 6.009, + "args": { + "External id": 250964,"Record function id": 0, "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918138398.894, "dur": 3.965, + "args": { + "External id": 250965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918138400.067, "dur": 2.473, + "args": { + "External id": 250966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918138400.932, "dur": 1.498, + "args": { + "External id": 250967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138407.907, "dur": 6.481, + "args": { + "External id": 250968,"Record function id": 0, "Sequence number": 2987627, "Fwd thread id": 1, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918138408.847, "dur": 3.562, + "args": { + "External id": 250969,"Sequence number": 2987627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1112 + } + }, + { + "ph": "f", "id": 117, "pid": 4183438, "tid": 31367, "ts": 667918138408.847, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918138410.146, "dur": 2.112, + "args": { + "External id": 250970,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918138410.920, "dur": 1.186, + "args": { + "External id": 250971,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918138418.259, "dur": 333.531, + "args": { + "External id": 250972,"Record function id": 0, "Sequence number": 2987626, "Fwd thread id": 1, "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918138419.558, "dur": 314.695, + "args": { + "External id": 250973,"Sequence number": 2987626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1116 + } + }, + { + "ph": "f", "id": 118, "pid": 4183438, "tid": 31367, "ts": 667918138419.558, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918138434.914, "dur": 7.239, + "args": { + "External id": 250974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138437.731, "dur": 3.994, + "args": { + "External id": 250975,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918138444.178, "dur": 5.118, + "args": { + "External id": 250976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138445.583, "dur": 3.531, + "args": { + "External id": 250977,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918138457.618, "dur": 5.039, + "args": { + "External id": 250978,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138460.711, "dur": 1.779, + "args": { + "External id": 250979,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918138489.234, "dur": 214.709, + "args": { + "External id": 250980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918138566.597, "dur": 4.726, + "args": { + "External id": 250981,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918138572.774, "dur": 4.179, + "args": { + "External id": 250982,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918138719.773, "dur": 4.847, + "args": { + "External id": 250983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918138728.199, "dur": 0.833, + "args": { + "External id": 250984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918138731.025, "dur": 0.757, + "args": { + "External id": 250985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918138762.268, "dur": 234.740, + "args": { + "External id": 250986,"Record function id": 0, "Sequence number": 2987625, "Fwd thread id": 1, "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918138764.479, "dur": 226.356, + "args": { + "External id": 250987,"Sequence number": 2987625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1130 + } + }, + { + "ph": "f", "id": 119, "pid": 4183438, "tid": 31367, "ts": 667918138764.479, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918138785.460, "dur": 50.946, + "args": { + "External id": 250988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138788.943, "dur": 3.997, + "args": { + "External id": 250989,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918138794.518, "dur": 41.118, + "args": { + "External id": 250990,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918138846.712, "dur": 5.810, + "args": { + "External id": 250991,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918138848.495, "dur": 3.637, + "args": { + "External id": 250992,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918139004.095, "dur": 150.043, + "args": { + "External id": 250993,"Record function id": 0, "Sequence number": 2987624, "Fwd thread id": 1, "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918139006.117, "dur": 142.010, + "args": { + "External id": 250994,"Sequence number": 2987624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1137 + } + }, + { + "ph": "f", "id": 120, "pid": 4183438, "tid": 31367, "ts": 667918139006.117, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918139017.604, "dur": 37.873, + "args": { + "External id": 250995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139020.466, "dur": 2.343, + "args": { + "External id": 250996,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918139027.170, "dur": 27.711, + "args": { + "External id": 250997,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918139063.292, "dur": 4.136, + "args": { + "External id": 250998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139065.011, "dur": 2.125, + "args": { + "External id": 250999,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139159.791, "dur": 14.348, + "args": { + "External id": 251000,"Record function id": 0, "Sequence number": 2987623, "Fwd thread id": 1, "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139160.904, "dur": 10.925, + "args": { + "External id": 251001,"Sequence number": 2987623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 121, "pid": 4183438, "tid": 31367, "ts": 667918139160.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139163.396, "dur": 8.171, + "args": { + "External id": 251002,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139164.906, "dur": 6.477, + "args": { + "External id": 251003,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139177.596, "dur": 7.947, + "args": { + "External id": 251004,"Record function id": 0, "Sequence number": 2987622, "Fwd thread id": 1, "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139180.266, "dur": 3.298, + "args": { + "External id": 251005,"Sequence number": 2987622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1148 + } + }, + { + "ph": "f", "id": 122, "pid": 4183438, "tid": 31367, "ts": 667918139180.266, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139181.542, "dur": 1.886, + "args": { + "External id": 251006,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139182.502, "dur": 0.772, + "args": { + "External id": 251007,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139188.649, "dur": 24.803, + "args": { + "External id": 251008,"Record function id": 0, "Sequence number": 2987621, "Fwd thread id": 1, "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139189.456, "dur": 21.656, + "args": { + "External id": 251009,"Sequence number": 2987621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1152 + } + }, + { + "ph": "f", "id": 123, "pid": 4183438, "tid": 31367, "ts": 667918139189.456, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139191.066, "dur": 19.866, + "args": { + "External id": 251010,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139209.149, "dur": 1.475, + "args": { + "External id": 251011,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139218.344, "dur": 8.435, + "args": { + "External id": 251012,"Record function id": 0, "Sequence number": 2987620, "Fwd thread id": 1, "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139219.796, "dur": 5.187, + "args": { + "External id": 251013,"Sequence number": 2987620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1156 + } + }, + { + "ph": "f", "id": 124, "pid": 4183438, "tid": 31367, "ts": 667918139219.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139221.006, "dur": 3.833, + "args": { + "External id": 251014,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139223.669, "dur": 1.069, + "args": { + "External id": 251015,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139230.328, "dur": 155.847, + "args": { + "External id": 251016,"Record function id": 0, "Sequence number": 2987619, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139231.665, "dur": 146.812, + "args": { + "External id": 251017,"Sequence number": 2987619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 125, "pid": 4183438, "tid": 31367, "ts": 667918139231.665, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139235.260, "dur": 6.349, + "args": { + "External id": 251018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139236.907, "dur": 4.033, + "args": { + "External id": 251019,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139239.205, "dur": 1.454, + "args": { + "External id": 251020,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139242.884, "dur": 81.507, + "args": { + "External id": 251021,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139327.660, "dur": 5.047, + "args": { + "External id": 251022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139328.706, "dur": 3.101, + "args": { + "External id": 251023,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139330.619, "dur": 1.023, + "args": { + "External id": 251024,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139334.312, "dur": 3.267, + "args": { + "External id": 251025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139335.252, "dur": 1.894, + "args": { + "External id": 251026,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139336.635, "dur": 0.447, + "args": { + "External id": 251027,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139340.043, "dur": 37.713, + "args": { + "External id": 251028,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139393.215, "dur": 6.488, + "args": { + "External id": 251029,"Record function id": 0, "Sequence number": 2987618, "Fwd thread id": 1, "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139394.087, "dur": 4.179, + "args": { + "External id": 251030,"Sequence number": 2987618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1173 + } + }, + { + "ph": "f", "id": 126, "pid": 4183438, "tid": 31367, "ts": 667918139394.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139395.678, "dur": 2.449, + "args": { + "External id": 251031,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139396.628, "dur": 1.357, + "args": { + "External id": 251032,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139403.119, "dur": 8.696, + "args": { + "External id": 251033,"Record function id": 0, "Sequence number": 2987617, "Fwd thread id": 1, "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139404.031, "dur": 5.789, + "args": { + "External id": 251034,"Sequence number": 2987617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1177 + } + }, + { + "ph": "f", "id": 127, "pid": 4183438, "tid": 31367, "ts": 667918139404.031, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139404.882, "dur": 4.739, + "args": { + "External id": 251035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139405.415, "dur": 3.735, + "args": { + "External id": 251036,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139408.509, "dur": 0.498, + "args": { + "External id": 251037,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139417.450, "dur": 9.302, + "args": { + "External id": 251038,"Record function id": 0, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139419.340, "dur": 6.799, + "args": { + "External id": 251039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918139422.007, "dur": 3.759, + "args": { + "External id": 251040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918139423.119, "dur": 2.533, + "args": { + "External id": 251041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139430.084, "dur": 6.452, + "args": { + "External id": 251042,"Record function id": 0, "Sequence number": 2987616, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139431.753, "dur": 2.866, + "args": { + "External id": 251043,"Sequence number": 2987616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 128, "pid": 4183438, "tid": 31367, "ts": 667918139431.753, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139432.734, "dur": 1.742, + "args": { + "External id": 251044,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139433.632, "dur": 0.740, + "args": { + "External id": 251045,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139439.838, "dur": 94.038, + "args": { + "External id": 251046,"Record function id": 0, "Sequence number": 2987615, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139440.613, "dur": 87.955, + "args": { + "External id": 251047,"Sequence number": 2987615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 129, "pid": 4183438, "tid": 31367, "ts": 667918139440.613, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139444.378, "dur": 3.293, + "args": { + "External id": 251048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139444.905, "dur": 2.334, + "args": { + "External id": 251049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139446.344, "dur": 0.770, + "args": { + "External id": 251050,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139448.380, "dur": 34.514, + "args": { + "External id": 251051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139484.002, "dur": 3.573, + "args": { + "External id": 251052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139484.690, "dur": 2.340, + "args": { + "External id": 251053,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139486.206, "dur": 0.707, + "args": { + "External id": 251054,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139489.878, "dur": 5.952, + "args": { + "External id": 251055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139491.254, "dur": 4.122, + "args": { + "External id": 251056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139492.979, "dur": 2.329, + "args": { + "External id": 251057,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139496.364, "dur": 31.448, + "args": { + "External id": 251058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139538.266, "dur": 34.929, + "args": { + "External id": 251059,"Record function id": 0, "Sequence number": 2987614, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139539.292, "dur": 5.868, + "args": { + "External id": 251060,"Sequence number": 2987614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 130, "pid": 4183438, "tid": 31367, "ts": 667918139539.292, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139541.207, "dur": 3.811, + "args": { + "External id": 251061,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139543.824, "dur": 1.041, + "args": { + "External id": 251062,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918139548.231, "dur": 22.715, + "args": { + "External id": 251063,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139576.628, "dur": 7.955, + "args": { + "External id": 251064,"Record function id": 0, "Sequence number": 2987613, "Fwd thread id": 1, "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139577.749, "dur": 5.004, + "args": { + "External id": 251065,"Sequence number": 2987613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "f", "id": 131, "pid": 4183438, "tid": 31367, "ts": 667918139577.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139578.434, "dur": 4.159, + "args": { + "External id": 251066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139579.396, "dur": 2.677, + "args": { + "External id": 251067,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139581.349, "dur": 0.626, + "args": { + "External id": 251068,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139588.568, "dur": 6.782, + "args": { + "External id": 251069,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139590.272, "dur": 4.605, + "args": { + "External id": 251070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918139591.131, "dur": 3.434, + "args": { + "External id": 251071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918139593.368, "dur": 1.096, + "args": { + "External id": 251072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139598.564, "dur": 8.336, + "args": { + "External id": 251073,"Record function id": 0, "Sequence number": 2987612, "Fwd thread id": 1, "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139599.554, "dur": 4.774, + "args": { + "External id": 251074,"Sequence number": 2987612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1217 + } + }, + { + "ph": "f", "id": 132, "pid": 4183438, "tid": 31367, "ts": 667918139599.554, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139600.617, "dur": 3.557, + "args": { + "External id": 251075,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139603.201, "dur": 0.880, + "args": { + "External id": 251076,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139610.022, "dur": 143.624, + "args": { + "External id": 251077,"Record function id": 0, "Sequence number": 2987611, "Fwd thread id": 1, "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139610.671, "dur": 134.193, + "args": { + "External id": 251078,"Sequence number": 2987611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "f", "id": 133, "pid": 4183438, "tid": 31367, "ts": 667918139610.671, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139614.909, "dur": 2.831, + "args": { + "External id": 251079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139615.307, "dur": 1.962, + "args": { + "External id": 251080,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139616.764, "dur": 0.397, + "args": { + "External id": 251081,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139618.416, "dur": 35.116, + "args": { + "External id": 251082,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139691.968, "dur": 8.383, + "args": { + "External id": 251083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139693.076, "dur": 6.384, + "args": { + "External id": 251084,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139696.921, "dur": 2.191, + "args": { + "External id": 251085,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139701.544, "dur": 3.834, + "args": { + "External id": 251086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139702.940, "dur": 2.016, + "args": { + "External id": 251087,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139704.301, "dur": 0.589, + "args": { + "External id": 251088,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918139706.072, "dur": 37.996, + "args": { + "External id": 251089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139759.912, "dur": 28.002, + "args": { + "External id": 251090,"Record function id": 0, "Sequence number": 2987610, "Fwd thread id": 1, "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139761.230, "dur": 5.698, + "args": { + "External id": 251091,"Sequence number": 2987610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1234 + } + }, + { + "ph": "f", "id": 134, "pid": 4183438, "tid": 31367, "ts": 667918139761.230, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139762.882, "dur": 3.915, + "args": { + "External id": 251092,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139765.450, "dur": 1.238, + "args": { + "External id": 251093,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918139769.754, "dur": 16.001, + "args": { + "External id": 251094,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139791.615, "dur": 7.625, + "args": { + "External id": 251095,"Record function id": 0, "Sequence number": 2987609, "Fwd thread id": 1, "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918139792.448, "dur": 4.902, + "args": { + "External id": 251096,"Sequence number": 2987609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "f", "id": 135, "pid": 4183438, "tid": 31367, "ts": 667918139792.448, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918139793.391, "dur": 3.797, + "args": { + "External id": 251097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918139794.207, "dur": 2.461, + "args": { + "External id": 251098,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918139796.028, "dur": 0.490, + "args": { + "External id": 251099,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139803.414, "dur": 6.028, + "args": { + "External id": 251100,"Record function id": 0, "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918139805.175, "dur": 3.829, + "args": { + "External id": 251101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918139806.125, "dur": 2.391, + "args": { + "External id": 251102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918139806.658, "dur": 1.749, + "args": { + "External id": 251103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918139815.368, "dur": 338.763, + "args": { + "External id": 251104,"Record function id": 0, "Sequence number": 2987608, "Fwd thread id": 1, "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918139816.901, "dur": 311.177, + "args": { + "External id": 251105,"Sequence number": 2987608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1248 + } + }, + { + "ph": "f", "id": 136, "pid": 4183438, "tid": 31367, "ts": 667918139816.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918139851.393, "dur": 2.116, + "args": { + "External id": 251106,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918139852.211, "dur": 1.131, + "args": { + "External id": 251107,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918139868.320, "dur": 6.183, + "args": { + "External id": 251108,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918139884.039, "dur": 3.118, + "args": { + "External id": 251109,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140029.566, "dur": 1.972, + "args": { + "External id": 251110,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918140035.943, "dur": 31.517, + "args": { + "External id": 251111,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140044.772, "dur": 0.871, + "args": { + "External id": 251112,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918140073.064, "dur": 30.088, + "args": { + "External id": 251113,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918140076.646, "dur": 26.216, + "args": { + "External id": 251114,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140080.856, "dur": 4.040, + "args": { + "External id": 251115,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918140086.462, "dur": 15.922, + "args": { + "External id": 251116,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918140107.299, "dur": 2.703, + "args": { + "External id": 251117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140108.586, "dur": 1.271, + "args": { + "External id": 251118,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140116.796, "dur": 3.885, + "args": { + "External id": 251119,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140119.796, "dur": 0.771, + "args": { + "External id": 251120,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918140136.559, "dur": 13.713, + "args": { + "External id": 251121,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140163.941, "dur": 7.789, + "args": { + "External id": 251122,"Record function id": 0, "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140166.245, "dur": 4.844, + "args": { + "External id": 251123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918140167.725, "dur": 2.478, + "args": { + "External id": 251124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918140168.654, "dur": 1.423, + "args": { + "External id": 251125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140175.664, "dur": 4.186, + "args": { + "External id": 251126,"Record function id": 0, "Sequence number": 2987607, "Fwd thread id": 1, "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140176.746, "dur": 0.876, + "args": { + "External id": 251127,"Sequence number": 2987607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1270 + } + }, + { + "ph": "f", "id": 137, "pid": 4183438, "tid": 31367, "ts": 667918140176.746, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918140183.525, "dur": 413.786, + "args": { + "External id": 251128,"Record function id": 0, "Sequence number": 2987606, "Fwd thread id": 1, "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918140184.892, "dur": 400.729, + "args": { + "External id": 251129,"Sequence number": 2987606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1272 + } + }, + { + "ph": "f", "id": 138, "pid": 4183438, "tid": 31367, "ts": 667918140184.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140236.958, "dur": 10.173, + "args": { + "External id": 251130,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918140242.567, "dur": 4.263, + "args": { + "External id": 251131,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]", "[4096, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[16777216, 4096, 1], [], []], "Input Dims": [[16, 4096, 1024], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140250.197, "dur": 5.283, + "args": { + "External id": 251132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140251.750, "dur": 2.953, + "args": { + "External id": 251133,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140253.804, "dur": 0.696, + "args": { + "External id": 251134,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 31367, + "ts": 667918140259.224, "dur": 88.578, + "args": { + "External id": 251135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [1, 2816], []], "Input Dims": [[65536, 1024], [2816, 1024], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140261.714, "dur": 4.953, + "args": { + "External id": 251136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2816]], "Input Dims": [[2816, 1024]], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140262.251, "dur": 3.947, + "args": { + "External id": 251137,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2816], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140263.671, "dur": 2.430, + "args": { + "External id": 251138,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[2816, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2816], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 31367, + "ts": 667918140268.042, "dur": 79.199, + "args": { + "External id": 251139,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918140269.572, "dur": 76.881, + "args": { + "External id": 251140,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918140352.060, "dur": 3.315, + "args": { + "External id": 251141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [11534336, 2816, 1]], "Input Dims": [[65536, 2816], [16, 4096, 2816]], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140353.622, "dur": 1.603, + "args": { + "External id": 251142,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918140389.728, "dur": 5.849, + "args": { + "External id": 251143,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918140396.859, "dur": 1.747, + "args": { + "External id": 251144,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918140399.734, "dur": 1.832, + "args": { + "External id": 251145,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140433.876, "dur": 2.488, + "args": { + "External id": 251146,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140434.947, "dur": 1.271, + "args": { + "External id": 251147,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 4183438, "tid": 31367, + "ts": 667918140459.026, "dur": 108.590, + "args": { + "External id": 251148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[4096, 1], [2816, 1]], []], "Input Dims": [[], [[65536, 1024], [65536, 2816]], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918140463.882, "dur": 6.828, + "args": { + "External id": 251149,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140467.413, "dur": 2.475, + "args": { + "External id": 251150,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024, 1]", "[4096, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918140472.536, "dur": 6.182, + "args": { + "External id": 251151,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1, 1], []], "Input Dims": [[65536, 1024, 1], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140475.872, "dur": 2.131, + "args": { + "External id": 251152,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 65536]", "[1, 1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 1], [], [], []], "Input Dims": [[65536, 1024, 1], [], [], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 31367, + "ts": 667918140480.135, "dur": 2.999, + "args": { + "External id": 251153,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140482.240, "dur": 0.529, + "args": { + "External id": 251154,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918140484.289, "dur": 2.700, + "args": { + "External id": 251155,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140486.021, "dur": 0.581, + "args": { + "External id": 251156,"Record function id": 0, "Concrete Inputs": ["", "[1, 2816, 65536]", "[1, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1, 1], [], [], []], "Input Dims": [[65536, 2816, 1], [], [], []], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918140490.816, "dur": 3.065, + "args": { + "External id": 251157,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 4096], []], "Input Dims": [[1024, 1, 65536], []], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140492.952, "dur": 0.620, + "args": { + "External id": 251158,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536, 1]", "[1, 4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 4096], [], [], []], "Input Dims": [[1024, 1, 65536], [], [], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140495.106, "dur": 5.323, + "args": { + "External id": 251159,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 4096, 1], []], "Input Dims": [[1024, 65536, 1], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 4183438, "tid": 31367, + "ts": 667918140498.648, "dur": 1.546, + "args": { + "External id": 251160,"Record function id": 0, "Concrete Inputs": ["", "[1, 1024, 65536]", "[1024, 1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 4096, 1], [], []], "Input Dims": [[1024, 65536, 1], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918140503.139, "dur": 2.795, + "args": { + "External id": 251161,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 2816], []], "Input Dims": [[1, 2816, 65536], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140505.213, "dur": 0.378, + "args": { + "External id": 251162,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816, 1]", "[2816, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 2816], [], [], []], "Input Dims": [[1, 2816, 65536], [], [], []], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140506.855, "dur": 4.267, + "args": { + "External id": 251163,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140508.574, "dur": 2.444, + "args": { + "External id": 251164,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 1], []], "Input Dims": [[65536, 2816, 1], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918140512.453, "dur": 42.595, + "args": { + "External id": 251165,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1, 4096], [184549376, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816]], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140556.940, "dur": 2.873, + "args": { + "External id": 251166,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2883584, 2816, 1], []], "Input Dims": [[1, 1024, 2816], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 4183438, "tid": 31367, + "ts": 667918140560.884, "dur": 2.798, + "args": { + "External id": 251167,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 2816, 1], []], "Input Dims": [[1024, 1, 2816], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140562.846, "dur": 0.330, + "args": { + "External id": 251168,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816, 1]", "[2816, 1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 2816, 1], [], [], []], "Input Dims": [[1024, 1, 2816], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140565.774, "dur": 0.840, + "args": { + "External id": 251169,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1, 2816], []], "Input Dims": [[1024, 2816, 1], []], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140608.032, "dur": 9.407, + "args": { + "External id": 251170,"Record function id": 0, "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140610.820, "dur": 5.887, + "args": { + "External id": 251171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918140612.951, "dur": 2.961, + "args": { + "External id": 251172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918140613.939, "dur": 1.849, + "args": { + "External id": 251173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140621.266, "dur": 7.087, + "args": { + "External id": 251174,"Record function id": 0, "Sequence number": 2987605, "Fwd thread id": 1, "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140622.421, "dur": 3.801, + "args": { + "External id": 251175,"Sequence number": 2987605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 1318 + } + }, + { + "ph": "f", "id": 139, "pid": 4183438, "tid": 31367, "ts": 667918140622.421, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140624.101, "dur": 1.905, + "args": { + "External id": 251176,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140625.039, "dur": 0.818, + "args": { + "External id": 251177,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140631.697, "dur": 158.478, + "args": { + "External id": 251178,"Record function id": 0, "Sequence number": 2987604, "Fwd thread id": 1, "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140632.499, "dur": 149.789, + "args": { + "External id": 251179,"Sequence number": 2987604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1322 + } + }, + { + "ph": "f", "id": 140, "pid": 4183438, "tid": 31367, "ts": 667918140632.499, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140637.482, "dur": 3.903, + "args": { + "External id": 251180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140638.650, "dur": 2.122, + "args": { + "External id": 251181,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140640.089, "dur": 0.513, + "args": { + "External id": 251182,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918140642.527, "dur": 79.133, + "args": { + "External id": 251183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140724.606, "dur": 7.409, + "args": { + "External id": 251184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140725.849, "dur": 5.277, + "args": { + "External id": 251185,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140730.046, "dur": 0.923, + "args": { + "External id": 251186,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140733.904, "dur": 3.937, + "args": { + "External id": 251187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140735.129, "dur": 2.051, + "args": { + "External id": 251188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140736.656, "dur": 0.456, + "args": { + "External id": 251189,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918140738.547, "dur": 42.902, + "args": { + "External id": 251190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140796.468, "dur": 8.474, + "args": { + "External id": 251191,"Record function id": 0, "Sequence number": 2987603, "Fwd thread id": 1, "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140797.380, "dur": 6.252, + "args": { + "External id": 251192,"Sequence number": 2987603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1335 + } + }, + { + "ph": "f", "id": 141, "pid": 4183438, "tid": 31367, "ts": 667918140797.380, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140799.305, "dur": 4.176, + "args": { + "External id": 251193,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140801.952, "dur": 1.397, + "args": { + "External id": 251194,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140808.714, "dur": 7.453, + "args": { + "External id": 251195,"Record function id": 0, "Sequence number": 2987602, "Fwd thread id": 1, "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140809.835, "dur": 4.492, + "args": { + "External id": 251196,"Sequence number": 2987602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1339 + } + }, + { + "ph": "f", "id": 142, "pid": 4183438, "tid": 31367, "ts": 667918140809.835, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140810.703, "dur": 3.404, + "args": { + "External id": 251197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140811.311, "dur": 2.269, + "args": { + "External id": 251198,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140813.134, "dur": 0.361, + "args": { + "External id": 251199,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140820.366, "dur": 6.113, + "args": { + "External id": 251200,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140822.165, "dur": 3.860, + "args": { + "External id": 251201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918140823.544, "dur": 2.188, + "args": { + "External id": 251202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918140824.239, "dur": 1.403, + "args": { + "External id": 251203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140829.773, "dur": 7.495, + "args": { + "External id": 251204,"Record function id": 0, "Sequence number": 2987601, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140831.153, "dur": 4.626, + "args": { + "External id": 251205,"Sequence number": 2987601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[11534336, 2816, 1]], "Input Dims": [[16, 4096, 2816]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 143, "pid": 4183438, "tid": 31367, "ts": 667918140831.153, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140834.026, "dur": 1.600, + "args": { + "External id": 251206,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140834.701, "dur": 0.812, + "args": { + "External id": 251207,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140840.210, "dur": 87.898, + "args": { + "External id": 251208,"Record function id": 0, "Sequence number": 2987600, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140841.049, "dur": 80.610, + "args": { + "External id": 251209,"Sequence number": 2987600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 144, "pid": 4183438, "tid": 31367, "ts": 667918140841.049, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140842.873, "dur": 5.307, + "args": { + "External id": 251210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[65536, 2816]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140843.518, "dur": 4.196, + "args": { + "External id": 251211,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[65536, 2816], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140847.023, "dur": 0.555, + "args": { + "External id": 251212,"Record function id": 0, "Concrete Inputs": ["", "[2816, 65536]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[65536, 2816], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918140848.839, "dur": 28.254, + "args": { + "External id": 251213,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140878.006, "dur": 4.193, + "args": { + "External id": 251214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140878.929, "dur": 2.700, + "args": { + "External id": 251215,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140880.650, "dur": 0.812, + "args": { + "External id": 251216,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140883.241, "dur": 6.198, + "args": { + "External id": 251217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140884.274, "dur": 4.752, + "args": { + "External id": 251218,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140887.177, "dur": 1.758, + "args": { + "External id": 251219,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918140889.862, "dur": 31.006, + "args": { + "External id": 251220,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140934.028, "dur": 31.222, + "args": { + "External id": 251221,"Record function id": 0, "Sequence number": 2987599, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140934.974, "dur": 4.160, + "args": { + "External id": 251222,"Sequence number": 2987599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 145, "pid": 4183438, "tid": 31367, "ts": 667918140934.974, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918140936.303, "dur": 2.682, + "args": { + "External id": 251223,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918140937.608, "dur": 1.231, + "args": { + "External id": 251224,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918140941.878, "dur": 20.570, + "args": { + "External id": 251225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140968.839, "dur": 9.251, + "args": { + "External id": 251226,"Record function id": 0, "Sequence number": 2987598, "Fwd thread id": 1, "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918140969.984, "dur": 5.772, + "args": { + "External id": 251227,"Sequence number": 2987598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1370 + } + }, + { + "ph": "f", "id": 146, "pid": 4183438, "tid": 31367, "ts": 667918140969.984, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918140970.843, "dur": 4.700, + "args": { + "External id": 251228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 2816]], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918140971.631, "dur": 3.406, + "args": { + "External id": 251229,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918140974.337, "dur": 0.571, + "args": { + "External id": 251230,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140982.021, "dur": 4.921, + "args": { + "External id": 251231,"Record function id": 0, "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918140983.599, "dur": 2.933, + "args": { + "External id": 251232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918140984.472, "dur": 1.590, + "args": { + "External id": 251233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918140984.922, "dur": 1.055, + "args": { + "External id": 251234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918140990.975, "dur": 448.902, + "args": { + "External id": 251235,"Record function id": 0, "Sequence number": 2987597, "Fwd thread id": 1, "Ev Idx": 1378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918140995.538, "dur": 405.418, + "args": { + "External id": 251236,"Sequence number": 2987597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 1379 + } + }, + { + "ph": "f", "id": 147, "pid": 4183438, "tid": 31367, "ts": 667918140995.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 31367, + "ts": 667918141019.590, "dur": 33.221, + "args": { + "External id": 251237,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918141020.924, "dur": 31.684, + "args": { + "External id": 251238,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918141023.775, "dur": 6.338, + "args": { + "External id": 251239,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918141026.398, "dur": 3.160, + "args": { + "External id": 251240,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918141031.326, "dur": 20.785, + "args": { + "External id": 251241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141066.153, "dur": 2.490, + "args": { + "External id": 251242,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141067.050, "dur": 1.427, + "args": { + "External id": 251243,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141072.594, "dur": 3.782, + "args": { + "External id": 251244,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141073.531, "dur": 2.742, + "args": { + "External id": 251245,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918141090.364, "dur": 3.023, + "args": { + "External id": 251246,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918141104.560, "dur": 3.580, + "args": { + "External id": 251247,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141288.602, "dur": 4.400, + "args": { + "External id": 251248,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918141298.051, "dur": 34.238, + "args": { + "External id": 251249,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141307.885, "dur": 0.916, + "args": { + "External id": 251250,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918141337.620, "dur": 31.090, + "args": { + "External id": 251251,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918141339.401, "dur": 29.095, + "args": { + "External id": 251252,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141343.998, "dur": 3.688, + "args": { + "External id": 251253,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918141351.268, "dur": 16.625, + "args": { + "External id": 251254,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918141373.197, "dur": 2.104, + "args": { + "External id": 251255,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141374.237, "dur": 0.959, + "args": { + "External id": 251256,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141381.856, "dur": 2.627, + "args": { + "External id": 251257,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141383.202, "dur": 1.182, + "args": { + "External id": 251258,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141387.126, "dur": 4.200, + "args": { + "External id": 251259,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141388.364, "dur": 2.866, + "args": { + "External id": 251260,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918141416.840, "dur": 21.523, + "args": { + "External id": 251261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918141451.685, "dur": 9.059, + "args": { + "External id": 251262,"Record function id": 0, "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918141454.046, "dur": 6.058, + "args": { + "External id": 251263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918141456.164, "dur": 2.876, + "args": { + "External id": 251264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918141457.156, "dur": 1.812, + "args": { + "External id": 251265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141464.671, "dur": 8.470, + "args": { + "External id": 251266,"Record function id": 0, "Sequence number": 2987596, "Fwd thread id": 1, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141465.977, "dur": 4.490, + "args": { + "External id": 251267,"Sequence number": 2987596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1410 + } + }, + { + "ph": "f", "id": 148, "pid": 4183438, "tid": 31367, "ts": 667918141465.977, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141467.662, "dur": 2.617, + "args": { + "External id": 251268,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141468.932, "dur": 1.207, + "args": { + "External id": 251269,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141479.302, "dur": 132.330, + "args": { + "External id": 251270,"Record function id": 0, "Sequence number": 2987595, "Fwd thread id": 1, "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141480.200, "dur": 125.314, + "args": { + "External id": 251271,"Sequence number": 2987595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1414 + } + }, + { + "ph": "f", "id": 149, "pid": 4183438, "tid": 31367, "ts": 667918141480.200, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918141483.453, "dur": 5.462, + "args": { + "External id": 251272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918141484.890, "dur": 3.370, + "args": { + "External id": 251273,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141487.152, "dur": 0.915, + "args": { + "External id": 251274,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918141490.145, "dur": 66.090, + "args": { + "External id": 251275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918141557.247, "dur": 7.421, + "args": { + "External id": 251276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918141559.806, "dur": 4.118, + "args": { + "External id": 251277,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141561.572, "dur": 2.179, + "args": { + "External id": 251278,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918141566.003, "dur": 4.020, + "args": { + "External id": 251279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918141567.160, "dur": 2.363, + "args": { + "External id": 251280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141568.964, "dur": 0.425, + "args": { + "External id": 251281,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918141570.691, "dur": 34.052, + "args": { + "External id": 251282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141616.416, "dur": 11.442, + "args": { + "External id": 251283,"Record function id": 0, "Sequence number": 2987594, "Fwd thread id": 1, "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141617.707, "dur": 7.851, + "args": { + "External id": 251284,"Sequence number": 2987594, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1427 + } + }, + { + "ph": "f", "id": 150, "pid": 4183438, "tid": 31367, "ts": 667918141617.707, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141621.493, "dur": 3.918, + "args": { + "External id": 251285,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141622.313, "dur": 2.927, + "args": { + "External id": 251286,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141631.479, "dur": 7.483, + "args": { + "External id": 251287,"Record function id": 0, "Sequence number": 2987593, "Fwd thread id": 1, "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141632.297, "dur": 4.833, + "args": { + "External id": 251288,"Sequence number": 2987593, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1431 + } + }, + { + "ph": "f", "id": 151, "pid": 4183438, "tid": 31367, "ts": 667918141632.297, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918141633.339, "dur": 3.575, + "args": { + "External id": 251289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918141634.122, "dur": 2.258, + "args": { + "External id": 251290,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141635.626, "dur": 0.659, + "args": { + "External id": 251291,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918141642.611, "dur": 7.537, + "args": { + "External id": 251292,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918141644.720, "dur": 4.978, + "args": { + "External id": 251293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918141645.739, "dur": 3.715, + "args": { + "External id": 251294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918141648.237, "dur": 1.124, + "args": { + "External id": 251295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141653.222, "dur": 45.570, + "args": { + "External id": 251296,"Record function id": 0, "Sequence number": 2987592, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918141690.442, "dur": 5.781, + "args": { + "External id": 251297,"Sequence number": 2987592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 152, "pid": 4183438, "tid": 31367, "ts": 667918141690.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918141692.735, "dur": 3.325, + "args": { + "External id": 251298,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918141693.655, "dur": 2.136, + "args": { + "External id": 251299,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918141704.794, "dur": 278.574, + "args": { + "External id": 251300,"Record function id": 0, "Sequence number": 2987591, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 4183438, "tid": 31367, + "ts": 667918141706.816, "dur": 260.707, + "args": { + "External id": 251301,"Sequence number": 2987591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 153, "pid": 4183438, "tid": 31367, "ts": 667918141706.816, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918141723.037, "dur": 8.109, + "args": { + "External id": 251302,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141725.853, "dur": 4.903, + "args": { + "External id": 251303,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918141733.408, "dur": 3.430, + "args": { + "External id": 251304,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141735.068, "dur": 1.602, + "args": { + "External id": 251305,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918141738.332, "dur": 3.826, + "args": { + "External id": 251306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918141740.439, "dur": 1.510, + "args": { + "External id": 251307,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918141765.265, "dur": 177.377, + "args": { + "External id": 251308,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918141846.731, "dur": 4.397, + "args": { + "External id": 251309,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918141852.918, "dur": 3.161, + "args": { + "External id": 251310,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918141955.004, "dur": 3.578, + "args": { + "External id": 251311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918141961.577, "dur": 0.869, + "args": { + "External id": 251312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 31367, + "ts": 667918141964.265, "dur": 0.567, + "args": { + "External id": 251313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918141990.358, "dur": 247.775, + "args": { + "External id": 251314,"Record function id": 0, "Sequence number": 2987590, "Fwd thread id": 1, "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918141992.174, "dur": 237.629, + "args": { + "External id": 251315,"Sequence number": 2987590, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1458 + } + }, + { + "ph": "f", "id": 154, "pid": 4183438, "tid": 31367, "ts": 667918141992.174, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918142011.592, "dur": 48.439, + "args": { + "External id": 251316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142014.667, "dur": 2.696, + "args": { + "External id": 251317,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918142018.697, "dur": 40.775, + "args": { + "External id": 251318,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918142070.040, "dur": 4.196, + "args": { + "External id": 251319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142071.731, "dur": 2.178, + "args": { + "External id": 251320,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918142247.549, "dur": 163.766, + "args": { + "External id": 251321,"Record function id": 0, "Sequence number": 2987589, "Fwd thread id": 1, "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918142249.548, "dur": 155.485, + "args": { + "External id": 251322,"Sequence number": 2987589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1465 + } + }, + { + "ph": "f", "id": 155, "pid": 4183438, "tid": 31367, "ts": 667918142249.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 31367, + "ts": 667918142262.693, "dur": 39.066, + "args": { + "External id": 251323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142265.830, "dur": 3.654, + "args": { + "External id": 251324,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918142270.465, "dur": 30.778, + "args": { + "External id": 251325,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 31367, + "ts": 667918142309.546, "dur": 4.274, + "args": { + "External id": 251326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142311.581, "dur": 1.901, + "args": { + "External id": 251327,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142417.047, "dur": 17.267, + "args": { + "External id": 251328,"Record function id": 0, "Sequence number": 2987588, "Fwd thread id": 1, "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142418.433, "dur": 13.146, + "args": { + "External id": 251329,"Sequence number": 2987588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1472 + } + }, + { + "ph": "f", "id": 156, "pid": 4183438, "tid": 31367, "ts": 667918142418.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142422.981, "dur": 8.316, + "args": { + "External id": 251330,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142424.851, "dur": 6.235, + "args": { + "External id": 251331,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142437.779, "dur": 6.866, + "args": { + "External id": 251332,"Record function id": 0, "Sequence number": 2987587, "Fwd thread id": 1, "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142438.715, "dur": 3.815, + "args": { + "External id": 251333,"Sequence number": 2987587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1476 + } + }, + { + "ph": "f", "id": 157, "pid": 4183438, "tid": 31367, "ts": 667918142438.715, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142440.011, "dur": 2.361, + "args": { + "External id": 251334,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142441.213, "dur": 0.982, + "args": { + "External id": 251335,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142447.595, "dur": 5.929, + "args": { + "External id": 251336,"Record function id": 0, "Sequence number": 2987586, "Fwd thread id": 1, "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142448.530, "dur": 3.175, + "args": { + "External id": 251337,"Sequence number": 2987586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 1480 + } + }, + { + "ph": "f", "id": 158, "pid": 4183438, "tid": 31367, "ts": 667918142448.530, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142449.794, "dur": 1.757, + "args": { + "External id": 251338,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142450.665, "dur": 0.782, + "args": { + "External id": 251339,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142456.817, "dur": 7.584, + "args": { + "External id": 251340,"Record function id": 0, "Sequence number": 2987585, "Fwd thread id": 1, "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142457.792, "dur": 4.631, + "args": { + "External id": 251341,"Sequence number": 2987585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1484 + } + }, + { + "ph": "f", "id": 159, "pid": 4183438, "tid": 31367, "ts": 667918142457.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142460.701, "dur": 1.563, + "args": { + "External id": 251342,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142461.351, "dur": 0.820, + "args": { + "External id": 251343,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142467.611, "dur": 152.729, + "args": { + "External id": 251344,"Record function id": 0, "Sequence number": 2987584, "Fwd thread id": 1, "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142468.328, "dur": 145.847, + "args": { + "External id": 251345,"Sequence number": 2987584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1488 + } + }, + { + "ph": "f", "id": 160, "pid": 4183438, "tid": 31367, "ts": 667918142468.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142471.875, "dur": 9.116, + "args": { + "External id": 251346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142473.863, "dur": 6.411, + "args": { + "External id": 251347,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142478.299, "dur": 1.644, + "args": { + "External id": 251348,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142482.315, "dur": 79.341, + "args": { + "External id": 251349,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142563.182, "dur": 5.128, + "args": { + "External id": 251350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142564.005, "dur": 3.552, + "args": { + "External id": 251351,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142565.964, "dur": 1.451, + "args": { + "External id": 251352,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142569.774, "dur": 5.001, + "args": { + "External id": 251353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142571.262, "dur": 3.042, + "args": { + "External id": 251354,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142573.736, "dur": 0.504, + "args": { + "External id": 251355,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142575.337, "dur": 38.117, + "args": { + "External id": 251356,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142625.162, "dur": 6.784, + "args": { + "External id": 251357,"Record function id": 0, "Sequence number": 2987583, "Fwd thread id": 1, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142625.928, "dur": 4.696, + "args": { + "External id": 251358,"Sequence number": 2987583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1501 + } + }, + { + "ph": "f", "id": 161, "pid": 4183438, "tid": 31367, "ts": 667918142625.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142627.715, "dur": 2.767, + "args": { + "External id": 251359,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142628.946, "dur": 1.389, + "args": { + "External id": 251360,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142635.396, "dur": 9.850, + "args": { + "External id": 251361,"Record function id": 0, "Sequence number": 2987582, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142636.263, "dur": 7.001, + "args": { + "External id": 251362,"Sequence number": 2987582, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 162, "pid": 4183438, "tid": 31367, "ts": 667918142636.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142637.410, "dur": 5.651, + "args": { + "External id": 251363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142639.979, "dur": 2.527, + "args": { + "External id": 251364,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142641.885, "dur": 0.491, + "args": { + "External id": 251365,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918142651.014, "dur": 52.075, + "args": { + "External id": 251366,"Record function id": 0, "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918142652.803, "dur": 48.782, + "args": { + "External id": 251367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918142696.053, "dur": 4.889, + "args": { + "External id": 251368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918142697.473, "dur": 3.202, + "args": { + "External id": 251369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142708.229, "dur": 7.161, + "args": { + "External id": 251370,"Record function id": 0, "Sequence number": 2987581, "Fwd thread id": 1, "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142710.014, "dur": 3.294, + "args": { + "External id": 251371,"Sequence number": 2987581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1514 + } + }, + { + "ph": "f", "id": 163, "pid": 4183438, "tid": 31367, "ts": 667918142710.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142711.170, "dur": 1.949, + "args": { + "External id": 251372,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142711.714, "dur": 1.245, + "args": { + "External id": 251373,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142720.196, "dur": 103.966, + "args": { + "External id": 251374,"Record function id": 0, "Sequence number": 2987580, "Fwd thread id": 1, "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142720.941, "dur": 96.151, + "args": { + "External id": 251375,"Sequence number": 2987580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1518 + } + }, + { + "ph": "f", "id": 164, "pid": 4183438, "tid": 31367, "ts": 667918142720.941, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142723.367, "dur": 3.927, + "args": { + "External id": 251376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142724.106, "dur": 2.700, + "args": { + "External id": 251377,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142725.794, "dur": 0.871, + "args": { + "External id": 251378,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142728.042, "dur": 41.492, + "args": { + "External id": 251379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142770.705, "dur": 5.627, + "args": { + "External id": 251380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142771.459, "dur": 4.334, + "args": { + "External id": 251381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142774.709, "dur": 0.966, + "args": { + "External id": 251382,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142777.364, "dur": 4.968, + "args": { + "External id": 251383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142778.365, "dur": 3.524, + "args": { + "External id": 251384,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142780.090, "dur": 1.712, + "args": { + "External id": 251385,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142782.929, "dur": 33.466, + "args": { + "External id": 251386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142829.858, "dur": 38.513, + "args": { + "External id": 251387,"Record function id": 0, "Sequence number": 2987579, "Fwd thread id": 1, "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142830.791, "dur": 8.826, + "args": { + "External id": 251388,"Sequence number": 2987579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1531 + } + }, + { + "ph": "f", "id": 165, "pid": 4183438, "tid": 31367, "ts": 667918142830.791, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142834.760, "dur": 4.682, + "args": { + "External id": 251389,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142838.058, "dur": 1.294, + "args": { + "External id": 251390,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 31367, + "ts": 667918142842.802, "dur": 23.106, + "args": { + "External id": 251391,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142872.339, "dur": 24.670, + "args": { + "External id": 251392,"Record function id": 0, "Sequence number": 2987578, "Fwd thread id": 1, "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142889.162, "dur": 6.012, + "args": { + "External id": 251393,"Sequence number": 2987578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "f", "id": 166, "pid": 4183438, "tid": 31367, "ts": 667918142889.162, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142889.858, "dur": 5.117, + "args": { + "External id": 251394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142890.752, "dur": 3.650, + "args": { + "External id": 251395,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142893.839, "dur": 0.416, + "args": { + "External id": 251396,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918142901.173, "dur": 5.182, + "args": { + "External id": 251397,"Record function id": 0, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918142902.825, "dur": 3.021, + "args": { + "External id": 251398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918142903.961, "dur": 1.583, + "args": { + "External id": 251399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918142904.370, "dur": 1.061, + "args": { + "External id": 251400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142909.598, "dur": 6.175, + "args": { + "External id": 251401,"Record function id": 0, "Sequence number": 2987577, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142911.028, "dur": 2.819, + "args": { + "External id": 251402,"Sequence number": 2987577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 167, "pid": 4183438, "tid": 31367, "ts": 667918142911.028, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918142911.967, "dur": 1.746, + "args": { + "External id": 251403,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918142912.648, "dur": 0.963, + "args": { + "External id": 251404,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142918.831, "dur": 92.187, + "args": { + "External id": 251405,"Record function id": 0, "Sequence number": 2987576, "Fwd thread id": 1, "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918142919.594, "dur": 84.527, + "args": { + "External id": 251406,"Sequence number": 2987576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1549 + } + }, + { + "ph": "f", "id": 168, "pid": 4183438, "tid": 31367, "ts": 667918142919.594, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142922.887, "dur": 2.627, + "args": { + "External id": 251407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142923.311, "dur": 1.736, + "args": { + "External id": 251408,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[65536, 1024], [], []], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142924.487, "dur": 0.446, + "args": { + "External id": 251409,"Record function id": 0, "Concrete Inputs": ["", "[1024, 65536]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142926.151, "dur": 36.955, + "args": { + "External id": 251410,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024]], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142964.267, "dur": 4.570, + "args": { + "External id": 251411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142964.857, "dur": 3.409, + "args": { + "External id": 251412,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142967.645, "dur": 0.512, + "args": { + "External id": 251413,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918142969.848, "dur": 3.177, + "args": { + "External id": 251414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918142971.023, "dur": 1.555, + "args": { + "External id": 251415,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918142972.243, "dur": 0.252, + "args": { + "External id": 251416,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918142973.606, "dur": 29.964, + "args": { + "External id": 251417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918143015.161, "dur": 24.661, + "args": { + "External id": 251418,"Record function id": 0, "Sequence number": 2987575, "Fwd thread id": 1, "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918143015.986, "dur": 3.657, + "args": { + "External id": 251419,"Sequence number": 2987575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 1562 + } + }, + { + "ph": "f", "id": 169, "pid": 4183438, "tid": 31367, "ts": 667918143015.986, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918143017.328, "dur": 2.172, + "args": { + "External id": 251420,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918143018.268, "dur": 1.126, + "args": { + "External id": 251421,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918143021.750, "dur": 15.984, + "args": { + "External id": 251422,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918143043.292, "dur": 8.921, + "args": { + "External id": 251423,"Record function id": 0, "Sequence number": 2987574, "Fwd thread id": 1, "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 4183438, "tid": 31367, + "ts": 667918143046.233, "dur": 3.882, + "args": { + "External id": 251424,"Sequence number": 2987574, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1567 + } + }, + { + "ph": "f", "id": 170, "pid": 4183438, "tid": 31367, "ts": 667918143046.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 31367, + "ts": 667918143046.743, "dur": 3.191, + "args": { + "External id": 251425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 1024]], "Input Dims": [[1024, 1024]], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 31367, + "ts": 667918143047.485, "dur": 1.962, + "args": { + "External id": 251426,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 1024], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143048.816, "dur": 0.436, + "args": { + "External id": 251427,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1024, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1024], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918143055.816, "dur": 4.537, + "args": { + "External id": 251428,"Record function id": 0, "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918143057.374, "dur": 2.518, + "args": { + "External id": 251429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918143058.103, "dur": 1.502, + "args": { + "External id": 251430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918143058.483, "dur": 1.015, + "args": { + "External id": 251431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918143064.560, "dur": 378.475, + "args": { + "External id": 251432,"Record function id": 0, "Sequence number": 2987573, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918143065.942, "dur": 343.789, + "args": { + "External id": 251433,"Sequence number": 2987573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 171, "pid": 4183438, "tid": 31367, "ts": 667918143065.942, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918143099.742, "dur": 3.565, + "args": { + "External id": 251434,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918143101.961, "dur": 1.175, + "args": { + "External id": 251435,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918143118.155, "dur": 5.286, + "args": { + "External id": 251436,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918143132.475, "dur": 3.111, + "args": { + "External id": 251437,"Record function id": 0, "Concrete Inputs": ["[132, 1024]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918143304.387, "dur": 3.010, + "args": { + "External id": 251438,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 1024]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[132, 1024], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 31367, + "ts": 667918143312.016, "dur": 37.669, + "args": { + "External id": 251439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[135168, 1024, 1], [], [], []], "Input Dims": [[1, 132, 1024], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143323.726, "dur": 0.862, + "args": { + "External id": 251440,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 1024]", "[1024, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1, 1024], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918143355.681, "dur": 32.793, + "args": { + "External id": 251441,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918143359.278, "dur": 28.870, + "args": { + "External id": 251442,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[1, 1024], [], [], [], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143363.813, "dur": 4.054, + "args": { + "External id": 251443,"Record function id": 0, "Concrete Inputs": ["[1, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918143369.358, "dur": 18.282, + "args": { + "External id": 251444,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[1, 1024], [1, 1024], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 31367, + "ts": 667918143392.772, "dur": 2.416, + "args": { + "External id": 251445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1]], "Input Dims": [[1, 1024], [1024]], "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918143393.902, "dur": 1.170, + "args": { + "External id": 251446,"Record function id": 0, "Concrete Inputs": ["", "[1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[1, 1024], []], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 31367, + "ts": 667918143400.861, "dur": 2.054, + "args": { + "External id": 251447,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918143401.837, "dur": 0.938, + "args": { + "External id": 251448,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 31367, + "ts": 667918143423.360, "dur": 14.968, + "args": { + "External id": 251449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918143454.702, "dur": 8.605, + "args": { + "External id": 251450,"Record function id": 0, "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918143456.931, "dur": 5.646, + "args": { + "External id": 251451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918143459.151, "dur": 2.401, + "args": { + "External id": 251452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918143459.791, "dur": 1.655, + "args": { + "External id": 251453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918143467.955, "dur": 2636.027, + "args": { + "External id": 251454,"Record function id": 0, "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 4183438, "tid": 31367, + "ts": 667918143496.908, "dur": 924.148, + "args": { + "External id": 251455,"Record function id": 0, "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 4183438, "tid": 31367, + "ts": 667918143522.045, "dur": 891.055, + "args": { + "External id": 251456,"Record function id": 0, "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918143535.053, "dur": 862.715, + "args": { + "External id": 251457,"Record function id": 0, "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918143608.897, "dur": 7.988, + "args": { + "External id": 251458,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918143632.787, "dur": 73.031, + "args": { + "External id": 251459,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143636.851, "dur": 1.124, + "args": { + "External id": 251460,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143640.062, "dur": 2.606, + "args": { + "External id": 251461,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143644.809, "dur": 0.351, + "args": { + "External id": 251462,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143646.533, "dur": 0.398, + "args": { + "External id": 251463,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143648.458, "dur": 0.414, + "args": { + "External id": 251464,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143652.089, "dur": 0.537, + "args": { + "External id": 251465,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143653.777, "dur": 39.672, + "args": { + "External id": 251466,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143696.457, "dur": 2.080, + "args": { + "External id": 251467,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143700.442, "dur": 0.383, + "args": { + "External id": 251468,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918143721.612, "dur": 35.634, + "args": { + "External id": 251469,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918143796.959, "dur": 115.471, + "args": { + "External id": 251470,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918143808.228, "dur": 7.872, + "args": { + "External id": 251471,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918143821.087, "dur": 11.160, + "args": { + "External id": 251472,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918143826.594, "dur": 5.267, + "args": { + "External id": 251473,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143829.940, "dur": 0.540, + "args": { + "External id": 251474,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918143840.609, "dur": 27.727, + "args": { + "External id": 251475,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143843.074, "dur": 0.351, + "args": { + "External id": 251476,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143845.150, "dur": 1.813, + "args": { + "External id": 251477,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143848.732, "dur": 0.373, + "args": { + "External id": 251478,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143850.396, "dur": 0.281, + "args": { + "External id": 251479,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143854.174, "dur": 0.255, + "args": { + "External id": 251480,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143855.671, "dur": 0.361, + "args": { + "External id": 251481,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143857.307, "dur": 1.592, + "args": { + "External id": 251482,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143861.455, "dur": 0.407, + "args": { + "External id": 251483,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918143863.300, "dur": 0.377, + "args": { + "External id": 251484,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918143879.761, "dur": 24.483, + "args": { + "External id": 251485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918143971.010, "dur": 323.148, + "args": { + "External id": 251486,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918144002.008, "dur": 286.709, + "args": { + "External id": 251487,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1630, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918144013.168, "dur": 269.311, + "args": { + "External id": 251488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918144317.949, "dur": 2.381, + "args": { + "External id": 251489,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1632, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918144429.287, "dur": 1654.185, + "args": { + "External id": 251490,"Sequence number": 2987572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1633 + } + }, + { + "ph": "f", "id": 172, "pid": 4183438, "tid": 31367, "ts": 667918144429.287, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918144556.897, "dur": 150.404, + "args": { + "External id": 251491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918144756.556, "dur": 43.519, + "args": { + "External id": 251492,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918144821.042, "dur": 48.641, + "args": { + "External id": 251493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918144879.494, "dur": 27.156, + "args": { + "External id": 251494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918144913.229, "dur": 33.361, + "args": { + "External id": 251495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918144954.509, "dur": 21.708, + "args": { + "External id": 251496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918144983.210, "dur": 30.065, + "args": { + "External id": 251497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918145040.562, "dur": 26.383, + "args": { + "External id": 251498,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918145086.533, "dur": 30.394, + "args": { + "External id": 251499,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918145138.976, "dur": 22.209, + "args": { + "External id": 251500,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918145175.657, "dur": 38.878, + "args": { + "External id": 251501,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145229.007, "dur": 40.823, + "args": { + "External id": 251502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145273.246, "dur": 31.419, + "args": { + "External id": 251503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918145331.869, "dur": 170.492, + "args": { + "External id": 251504,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918145410.020, "dur": 5.573, + "args": { + "External id": 251505,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918145417.442, "dur": 3.628, + "args": { + "External id": 251506,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918145535.982, "dur": 26.994, + "args": { + "External id": 251507,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918145575.002, "dur": 15.804, + "args": { + "External id": 251508,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145599.989, "dur": 42.030, + "args": { + "External id": 251509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145647.676, "dur": 72.973, + "args": { + "External id": 251510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145731.773, "dur": 30.400, + "args": { + "External id": 251511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145768.427, "dur": 28.555, + "args": { + "External id": 251512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145802.716, "dur": 28.300, + "args": { + "External id": 251513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918145837.542, "dur": 27.870, + "args": { + "External id": 251514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918145887.614, "dur": 23.487, + "args": { + "External id": 251515,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918145932.347, "dur": 25.391, + "args": { + "External id": 251516,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918145974.809, "dur": 16.076, + "args": { + "External id": 251517,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918146006.376, "dur": 13.723, + "args": { + "External id": 251518,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918146035.457, "dur": 18.128, + "args": { + "External id": 251519,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146126.147, "dur": 15.135, + "args": { + "External id": 251520,"Record function id": 0, "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146129.484, "dur": 10.855, + "args": { + "External id": 251521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146133.663, "dur": 5.857, + "args": { + "External id": 251522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146135.388, "dur": 4.038, + "args": { + "External id": 251523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146145.201, "dur": 4.427, + "args": { + "External id": 251524,"Record function id": 0, "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146146.732, "dur": 2.474, + "args": { + "External id": 251525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146147.346, "dur": 1.427, + "args": { + "External id": 251526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146147.785, "dur": 0.880, + "args": { + "External id": 251527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146152.993, "dur": 4.130, + "args": { + "External id": 251528,"Record function id": 0, "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146154.158, "dur": 2.540, + "args": { + "External id": 251529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146154.973, "dur": 1.279, + "args": { + "External id": 251530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146155.465, "dur": 0.673, + "args": { + "External id": 251531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146160.479, "dur": 5.098, + "args": { + "External id": 251532,"Record function id": 0, "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146161.825, "dur": 3.350, + "args": { + "External id": 251533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146162.349, "dur": 2.419, + "args": { + "External id": 251534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146162.669, "dur": 2.021, + "args": { + "External id": 251535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146168.644, "dur": 4.052, + "args": { + "External id": 251536,"Record function id": 0, "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146169.963, "dur": 2.287, + "args": { + "External id": 251537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146170.491, "dur": 1.266, + "args": { + "External id": 251538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146170.996, "dur": 0.683, + "args": { + "External id": 251539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146175.746, "dur": 5.039, + "args": { + "External id": 251540,"Record function id": 0, "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146176.980, "dur": 3.417, + "args": { + "External id": 251541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146177.464, "dur": 2.531, + "args": { + "External id": 251542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146179.251, "dur": 0.599, + "args": { + "External id": 251543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146183.895, "dur": 3.538, + "args": { + "External id": 251544,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146185.090, "dur": 1.928, + "args": { + "External id": 251545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146185.656, "dur": 0.987, + "args": { + "External id": 251546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146185.951, "dur": 0.601, + "args": { + "External id": 251547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146190.464, "dur": 23.225, + "args": { + "External id": 251548,"Record function id": 0, "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146207.939, "dur": 4.676, + "args": { + "External id": 251549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146209.581, "dur": 2.226, + "args": { + "External id": 251550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146210.243, "dur": 1.315, + "args": { + "External id": 251551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146220.064, "dur": 4.459, + "args": { + "External id": 251552,"Record function id": 0, "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918146221.450, "dur": 2.672, + "args": { + "External id": 251553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918146222.173, "dur": 1.395, + "args": { + "External id": 251554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918146222.590, "dur": 0.876, + "args": { + "External id": 251555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918146228.315, "dur": 59115.437, + "args": { + "External id": 251556,"Record function id": 0, "Sequence number": 2987571, "Fwd thread id": 1, "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918146229.720, "dur": 59105.321, + "args": { + "External id": 251557,"Sequence number": 2987571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1700 + } + }, + { + "ph": "f", "id": 173, "pid": 4183438, "tid": 31367, "ts": 667918146229.720, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 4183438, "tid": 31367, + "ts": 667918146261.994, "dur": 41.989, + "args": { + "External id": 251558,"Record function id": 0, "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 4183438, "tid": 31367, + "ts": 667918146311.112, "dur": 71.886, + "args": { + "External id": 251559,"Record function id": 0, "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 4183438, "tid": 31367, + "ts": 667918146389.950, "dur": 58936.437, + "args": { + "External id": 251560,"Record function id": 0, "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918146448.715, "dur": 11.057, + "args": { + "External id": 251561,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918146469.689, "dur": 4.676, + "args": { + "External id": 251562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918146490.212, "dur": 58034.513, + "args": { + "External id": 251563,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918146507.891, "dur": 58009.025, + "args": { + "External id": 251564,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918146546.032, "dur": 5.285, + "args": { + "External id": 251565,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918146557.246, "dur": 57922.482, + "args": { + "External id": 251566,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918146559.722, "dur": 57919.137, + "args": { + "External id": 251567,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918146563.758, "dur": 7.014, + "args": { + "External id": 251568,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918146572.459, "dur": 57902.636, + "args": { + "External id": 251569,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918204618.717, "dur": 8.677, + "args": { + "External id": 251570,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918204621.812, "dur": 5.240, + "args": { + "External id": 251571,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918204690.754, "dur": 297.667, + "args": { + "External id": 251572,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918204721.539, "dur": 262.518, + "args": { + "External id": 251573,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1716, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918204733.435, "dur": 245.513, + "args": { + "External id": 251574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918205009.998, "dur": 2.031, + "args": { + "External id": 251575,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1718, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205067.742, "dur": 6.711, + "args": { + "External id": 251576,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205132.284, "dur": 1.381, + "args": { + "External id": 251577,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205152.080, "dur": 1.466, + "args": { + "External id": 251578,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205168.756, "dur": 0.886, + "args": { + "External id": 251579,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205182.582, "dur": 0.927, + "args": { + "External id": 251580,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205212.153, "dur": 1.524, + "args": { + "External id": 251581,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205228.712, "dur": 1.149, + "args": { + "External id": 251582,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205244.023, "dur": 2.086, + "args": { + "External id": 251583,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205258.930, "dur": 0.824, + "args": { + "External id": 251584,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918205359.146, "dur": 2744.282, + "args": { + "External id": 251585,"Record function id": 0, "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918205379.041, "dur": 1054.828, + "args": { + "External id": 251586,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918205395.105, "dur": 363.770, + "args": { + "External id": 251587,"Record function id": 0, "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205470.833, "dur": 4.161, + "args": { + "External id": 251588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205477.883, "dur": 0.917, + "args": { + "External id": 251589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205480.497, "dur": 0.831, + "args": { + "External id": 251590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205482.917, "dur": 2.619, + "args": { + "External id": 251591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205487.064, "dur": 0.866, + "args": { + "External id": 251592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205490.906, "dur": 0.895, + "args": { + "External id": 251593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205493.690, "dur": 2.251, + "args": { + "External id": 251594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205497.535, "dur": 0.886, + "args": { + "External id": 251595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205499.799, "dur": 0.970, + "args": { + "External id": 251596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918205503.754, "dur": 0.836, + "args": { + "External id": 251597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918205521.755, "dur": 197.046, + "args": { + "External id": 251598,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918205537.407, "dur": 175.094, + "args": { + "External id": 251599,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918205555.229, "dur": 12.088, + "args": { + "External id": 251600,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918205570.023, "dur": 63.663, + "args": { + "External id": 251601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918205572.673, "dur": 60.720, + "args": { + "External id": 251602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205578.478, "dur": 7.056, + "args": { + "External id": 251603,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918205587.053, "dur": 45.805, + "args": { + "External id": 251604,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 4183438, "tid": 31367, + "ts": 667918205844.268, "dur": 581.247, + "args": { + "External id": 251605,"Record function id": 0, "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918205862.255, "dur": 549.845, + "args": { + "External id": 251606,"Record function id": 0, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918205918.742, "dur": 5.887, + "args": { + "External id": 251607,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918205939.872, "dur": 35.029, + "args": { + "External id": 251608,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205944.951, "dur": 1.543, + "args": { + "External id": 251609,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205948.538, "dur": 0.356, + "args": { + "External id": 251610,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205951.671, "dur": 0.451, + "args": { + "External id": 251611,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205954.221, "dur": 0.428, + "args": { + "External id": 251612,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205955.860, "dur": 0.589, + "args": { + "External id": 251613,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205959.237, "dur": 0.573, + "args": { + "External id": 251614,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205961.323, "dur": 2.663, + "args": { + "External id": 251615,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205965.490, "dur": 1.575, + "args": { + "External id": 251616,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918205968.503, "dur": 0.545, + "args": { + "External id": 251617,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918205987.945, "dur": 33.724, + "args": { + "External id": 251618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918206053.110, "dur": 104.613, + "args": { + "External id": 251619,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918206062.870, "dur": 4.958, + "args": { + "External id": 251620,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918206073.345, "dur": 11.078, + "args": { + "External id": 251621,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918206078.939, "dur": 5.089, + "args": { + "External id": 251622,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206082.416, "dur": 0.395, + "args": { + "External id": 251623,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918206091.245, "dur": 29.515, + "args": { + "External id": 251624,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206093.548, "dur": 0.496, + "args": { + "External id": 251625,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206095.816, "dur": 1.918, + "args": { + "External id": 251626,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206099.097, "dur": 0.418, + "args": { + "External id": 251627,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206101.302, "dur": 1.804, + "args": { + "External id": 251628,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206106.665, "dur": 0.290, + "args": { + "External id": 251629,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206108.594, "dur": 0.428, + "args": { + "External id": 251630,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206110.303, "dur": 0.449, + "args": { + "External id": 251631,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206113.749, "dur": 0.520, + "args": { + "External id": 251632,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918206115.549, "dur": 0.377, + "args": { + "External id": 251633,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918206131.200, "dur": 19.524, + "args": { + "External id": 251634,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918206216.416, "dur": 126.255, + "args": { + "External id": 251635,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918206244.999, "dur": 94.066, + "args": { + "External id": 251636,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1779, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918206256.435, "dur": 78.457, + "args": { + "External id": 251637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918206358.527, "dur": 2.030, + "args": { + "External id": 251638,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1781, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918206441.113, "dur": 1642.789, + "args": { + "External id": 251639,"Sequence number": 2987570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1782 + } + }, + { + "ph": "f", "id": 174, "pid": 4183438, "tid": 31367, "ts": 667918206441.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918206550.722, "dur": 98.391, + "args": { + "External id": 251640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918206734.855, "dur": 39.502, + "args": { + "External id": 251641,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918206792.661, "dur": 49.211, + "args": { + "External id": 251642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918206851.762, "dur": 28.052, + "args": { + "External id": 251643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918206887.528, "dur": 34.057, + "args": { + "External id": 251644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918206931.532, "dur": 21.570, + "args": { + "External id": 251645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918206960.306, "dur": 31.879, + "args": { + "External id": 251646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918207016.104, "dur": 22.737, + "args": { + "External id": 251647,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918207059.930, "dur": 26.820, + "args": { + "External id": 251648,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918207108.120, "dur": 19.503, + "args": { + "External id": 251649,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918207143.763, "dur": 14.526, + "args": { + "External id": 251650,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207165.270, "dur": 51.085, + "args": { + "External id": 251651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207221.417, "dur": 33.689, + "args": { + "External id": 251652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918207284.957, "dur": 171.262, + "args": { + "External id": 251653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918207361.552, "dur": 6.479, + "args": { + "External id": 251654,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918207373.165, "dur": 2.466, + "args": { + "External id": 251655,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918207499.397, "dur": 25.585, + "args": { + "External id": 251656,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918207538.533, "dur": 17.519, + "args": { + "External id": 251657,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207563.735, "dur": 44.647, + "args": { + "External id": 251658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207614.306, "dur": 31.325, + "args": { + "External id": 251659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207692.451, "dur": 35.318, + "args": { + "External id": 251660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207733.290, "dur": 29.370, + "args": { + "External id": 251661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207786.997, "dur": 38.464, + "args": { + "External id": 251662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918207833.558, "dur": 28.575, + "args": { + "External id": 251663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918207885.695, "dur": 22.951, + "args": { + "External id": 251664,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918207931.567, "dur": 25.589, + "args": { + "External id": 251665,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918207971.990, "dur": 19.165, + "args": { + "External id": 251666,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918208007.840, "dur": 13.386, + "args": { + "External id": 251667,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918208036.181, "dur": 17.221, + "args": { + "External id": 251668,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208125.861, "dur": 15.640, + "args": { + "External id": 251669,"Record function id": 0, "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208129.586, "dur": 10.958, + "args": { + "External id": 251670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208133.965, "dur": 5.621, + "args": { + "External id": 251671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208135.376, "dur": 4.101, + "args": { + "External id": 251672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208145.554, "dur": 4.596, + "args": { + "External id": 251673,"Record function id": 0, "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208147.024, "dur": 2.720, + "args": { + "External id": 251674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208147.896, "dur": 1.421, + "args": { + "External id": 251675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208148.432, "dur": 0.746, + "args": { + "External id": 251676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208153.503, "dur": 6.714, + "args": { + "External id": 251677,"Record function id": 0, "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208155.151, "dur": 4.665, + "args": { + "External id": 251678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208155.717, "dur": 3.705, + "args": { + "External id": 251679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208156.394, "dur": 2.921, + "args": { + "External id": 251680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208163.719, "dur": 4.456, + "args": { + "External id": 251681,"Record function id": 0, "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208165.403, "dur": 2.333, + "args": { + "External id": 251682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208165.870, "dur": 1.479, + "args": { + "External id": 251683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208166.176, "dur": 1.087, + "args": { + "External id": 251684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208171.478, "dur": 4.651, + "args": { + "External id": 251685,"Record function id": 0, "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208173.188, "dur": 2.543, + "args": { + "External id": 251686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208173.891, "dur": 1.306, + "args": { + "External id": 251687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208174.365, "dur": 0.731, + "args": { + "External id": 251688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208179.502, "dur": 3.865, + "args": { + "External id": 251689,"Record function id": 0, "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208180.847, "dur": 2.098, + "args": { + "External id": 251690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208181.480, "dur": 1.066, + "args": { + "External id": 251691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208181.822, "dur": 0.624, + "args": { + "External id": 251692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208186.651, "dur": 23.175, + "args": { + "External id": 251693,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208188.087, "dur": 20.307, + "args": { + "External id": 251694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208188.628, "dur": 2.810, + "args": { + "External id": 251695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208190.668, "dur": 0.662, + "args": { + "External id": 251696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208215.338, "dur": 5.942, + "args": { + "External id": 251697,"Record function id": 0, "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208217.284, "dur": 3.581, + "args": { + "External id": 251698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208218.251, "dur": 2.002, + "args": { + "External id": 251699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208218.799, "dur": 1.384, + "args": { + "External id": 251700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208224.365, "dur": 3.926, + "args": { + "External id": 251701,"Record function id": 0, "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918208225.862, "dur": 2.019, + "args": { + "External id": 251702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918208226.387, "dur": 1.074, + "args": { + "External id": 251703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918208226.677, "dur": 0.684, + "args": { + "External id": 251704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918208231.783, "dur": 15289.192, + "args": { + "External id": 251705,"Record function id": 0, "Sequence number": 2987569, "Fwd thread id": 1, "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918208232.946, "dur": 15279.914, + "args": { + "External id": 251706,"Sequence number": 2987569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1849 + } + }, + { + "ph": "f", "id": 175, "pid": 4183438, "tid": 31367, "ts": 667918208232.946, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918208262.193, "dur": 43.285, + "args": { + "External id": 251707,"Record function id": 0, "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918208313.089, "dur": 67.754, + "args": { + "External id": 251708,"Record function id": 0, "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 4183438, "tid": 31367, + "ts": 667918208387.509, "dur": 15117.077, + "args": { + "External id": 251709,"Record function id": 0, "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918208486.053, "dur": 6.661, + "args": { + "External id": 251710,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918208501.685, "dur": 6.285, + "args": { + "External id": 251711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918208522.513, "dur": 14231.716, + "args": { + "External id": 251712,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918208535.316, "dur": 14210.325, + "args": { + "External id": 251713,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918208563.786, "dur": 13.611, + "args": { + "External id": 251714,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918208582.645, "dur": 14126.231, + "args": { + "External id": 251715,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918208587.174, "dur": 14120.955, + "args": { + "External id": 251716,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918208592.882, "dur": 4.648, + "args": { + "External id": 251717,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918208599.265, "dur": 14105.482, + "args": { + "External id": 251718,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918222842.180, "dur": 9.353, + "args": { + "External id": 251719,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918222845.292, "dur": 5.895, + "args": { + "External id": 251720,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918222879.531, "dur": 311.706, + "args": { + "External id": 251721,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918222909.383, "dur": 277.401, + "args": { + "External id": 251722,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1865, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918222922.408, "dur": 259.356, + "args": { + "External id": 251723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918223229.532, "dur": 2.757, + "args": { + "External id": 251724,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1867, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223290.464, "dur": 6.403, + "args": { + "External id": 251725,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223344.814, "dur": 1.175, + "args": { + "External id": 251726,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223364.326, "dur": 2.678, + "args": { + "External id": 251727,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223379.325, "dur": 0.882, + "args": { + "External id": 251728,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223393.302, "dur": 0.789, + "args": { + "External id": 251729,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223406.094, "dur": 0.973, + "args": { + "External id": 251730,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223418.763, "dur": 2.658, + "args": { + "External id": 251731,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223433.398, "dur": 2.556, + "args": { + "External id": 251732,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223444.795, "dur": 0.891, + "args": { + "External id": 251733,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918223536.945, "dur": 2742.486, + "args": { + "External id": 251734,"Record function id": 0, "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918223555.605, "dur": 1032.791, + "args": { + "External id": 251735,"Record function id": 0, "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918223570.174, "dur": 344.618, + "args": { + "External id": 251736,"Record function id": 0, "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223646.141, "dur": 4.074, + "args": { + "External id": 251737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223689.950, "dur": 1.646, + "args": { + "External id": 251738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223693.735, "dur": 2.984, + "args": { + "External id": 251739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223700.384, "dur": 0.998, + "args": { + "External id": 251740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223702.611, "dur": 0.901, + "args": { + "External id": 251741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223704.935, "dur": 0.966, + "args": { + "External id": 251742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223707.633, "dur": 1.715, + "args": { + "External id": 251743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223712.463, "dur": 1.152, + "args": { + "External id": 251744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223715.173, "dur": 1.023, + "args": { + "External id": 251745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918223717.540, "dur": 0.956, + "args": { + "External id": 251746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918223736.968, "dur": 147.817, + "args": { + "External id": 251747,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918223752.231, "dur": 128.158, + "args": { + "External id": 251748,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918223772.024, "dur": 15.510, + "args": { + "External id": 251749,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918223790.311, "dur": 63.389, + "args": { + "External id": 251750,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918223792.668, "dur": 60.667, + "args": { + "External id": 251751,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918223796.476, "dur": 6.484, + "args": { + "External id": 251752,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918223804.747, "dur": 47.762, + "args": { + "External id": 251753,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 4183438, "tid": 31367, + "ts": 667918223997.023, "dur": 583.506, + "args": { + "External id": 251754,"Record function id": 0, "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918224015.052, "dur": 552.460, + "args": { + "External id": 251755,"Record function id": 0, "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918224073.081, "dur": 4.788, + "args": { + "External id": 251756,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918224093.135, "dur": 32.420, + "args": { + "External id": 251757,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224098.060, "dur": 1.618, + "args": { + "External id": 251758,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224103.109, "dur": 0.387, + "args": { + "External id": 251759,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224104.631, "dur": 0.416, + "args": { + "External id": 251760,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224106.466, "dur": 1.494, + "args": { + "External id": 251761,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224109.108, "dur": 0.575, + "args": { + "External id": 251762,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224110.526, "dur": 2.198, + "args": { + "External id": 251763,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224116.417, "dur": 0.472, + "args": { + "External id": 251764,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224118.162, "dur": 0.320, + "args": { + "External id": 251765,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224119.785, "dur": 0.446, + "args": { + "External id": 251766,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918224136.584, "dur": 31.750, + "args": { + "External id": 251767,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918224217.632, "dur": 113.241, + "args": { + "External id": 251768,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918224228.616, "dur": 4.869, + "args": { + "External id": 251769,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918224239.728, "dur": 12.090, + "args": { + "External id": 251770,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918224244.214, "dur": 7.156, + "args": { + "External id": 251771,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224247.712, "dur": 2.045, + "args": { + "External id": 251772,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918224258.441, "dur": 32.201, + "args": { + "External id": 251773,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224260.410, "dur": 0.396, + "args": { + "External id": 251774,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224261.907, "dur": 0.501, + "args": { + "External id": 251775,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224272.223, "dur": 2.221, + "args": { + "External id": 251776,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224276.733, "dur": 0.379, + "args": { + "External id": 251777,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224278.219, "dur": 0.415, + "args": { + "External id": 251778,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224279.641, "dur": 1.722, + "args": { + "External id": 251779,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224282.323, "dur": 0.549, + "args": { + "External id": 251780,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224283.762, "dur": 0.271, + "args": { + "External id": 251781,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918224286.093, "dur": 0.400, + "args": { + "External id": 251782,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918224301.457, "dur": 21.279, + "args": { + "External id": 251783,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918224377.003, "dur": 119.601, + "args": { + "External id": 251784,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918224405.125, "dur": 87.925, + "args": { + "External id": 251785,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1928, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918224414.566, "dur": 74.610, + "args": { + "External id": 251786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918224513.870, "dur": 1.990, + "args": { + "External id": 251787,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1930, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918224595.114, "dur": 1664.590, + "args": { + "External id": 251788,"Sequence number": 2987568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1931 + } + }, + { + "ph": "f", "id": 176, "pid": 4183438, "tid": 31367, "ts": 667918224595.114, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918224755.790, "dur": 105.944, + "args": { + "External id": 251789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918224905.745, "dur": 37.154, + "args": { + "External id": 251790,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918224960.114, "dur": 42.316, + "args": { + "External id": 251791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225013.736, "dur": 26.850, + "args": { + "External id": 251792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225046.421, "dur": 32.551, + "args": { + "External id": 251793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225085.219, "dur": 21.224, + "args": { + "External id": 251794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225117.682, "dur": 30.066, + "args": { + "External id": 251795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918225174.206, "dur": 41.989, + "args": { + "External id": 251796,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918225236.673, "dur": 29.886, + "args": { + "External id": 251797,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918225288.034, "dur": 19.795, + "args": { + "External id": 251798,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918225320.812, "dur": 15.513, + "args": { + "External id": 251799,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225345.343, "dur": 44.317, + "args": { + "External id": 251800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225393.080, "dur": 31.111, + "args": { + "External id": 251801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918225457.990, "dur": 168.230, + "args": { + "External id": 251802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918225534.173, "dur": 5.881, + "args": { + "External id": 251803,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918225541.775, "dur": 2.513, + "args": { + "External id": 251804,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918225694.997, "dur": 28.777, + "args": { + "External id": 251805,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918225739.135, "dur": 16.306, + "args": { + "External id": 251806,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225765.774, "dur": 50.416, + "args": { + "External id": 251807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225821.997, "dur": 32.732, + "args": { + "External id": 251808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225864.221, "dur": 28.154, + "args": { + "External id": 251809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225897.035, "dur": 46.302, + "args": { + "External id": 251810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225955.451, "dur": 35.496, + "args": { + "External id": 251811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918225998.349, "dur": 27.425, + "args": { + "External id": 251812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918226050.429, "dur": 22.422, + "args": { + "External id": 251813,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918226089.174, "dur": 27.350, + "args": { + "External id": 251814,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918226131.477, "dur": 18.974, + "args": { + "External id": 251815,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918226163.242, "dur": 15.304, + "args": { + "External id": 251816,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918226208.731, "dur": 20.509, + "args": { + "External id": 251817,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226304.985, "dur": 15.167, + "args": { + "External id": 251818,"Record function id": 0, "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226308.264, "dur": 10.950, + "args": { + "External id": 251819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226312.454, "dur": 5.795, + "args": { + "External id": 251820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226313.921, "dur": 4.236, + "args": { + "External id": 251821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226324.013, "dur": 5.583, + "args": { + "External id": 251822,"Record function id": 0, "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226325.332, "dur": 3.816, + "args": { + "External id": 251823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226326.089, "dur": 2.597, + "args": { + "External id": 251824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226326.432, "dur": 2.164, + "args": { + "External id": 251825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226332.989, "dur": 4.554, + "args": { + "External id": 251826,"Record function id": 0, "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226334.288, "dur": 2.788, + "args": { + "External id": 251827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226334.905, "dur": 1.729, + "args": { + "External id": 251828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226335.543, "dur": 0.993, + "args": { + "External id": 251829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226343.982, "dur": 3.814, + "args": { + "External id": 251830,"Record function id": 0, "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226345.218, "dur": 2.169, + "args": { + "External id": 251831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226345.968, "dur": 0.991, + "args": { + "External id": 251832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226346.252, "dur": 0.604, + "args": { + "External id": 251833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226350.818, "dur": 3.793, + "args": { + "External id": 251834,"Record function id": 0, "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226351.973, "dur": 2.245, + "args": { + "External id": 251835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226352.457, "dur": 1.318, + "args": { + "External id": 251836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226353.085, "dur": 0.618, + "args": { + "External id": 251837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226357.958, "dur": 3.700, + "args": { + "External id": 251838,"Record function id": 0, "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226359.200, "dur": 2.061, + "args": { + "External id": 251839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226359.667, "dur": 1.198, + "args": { + "External id": 251840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226360.103, "dur": 0.689, + "args": { + "External id": 251841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226364.717, "dur": 3.566, + "args": { + "External id": 251842,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226365.964, "dur": 1.916, + "args": { + "External id": 251843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226366.494, "dur": 1.009, + "args": { + "External id": 251844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226366.856, "dur": 0.574, + "args": { + "External id": 251845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226371.341, "dur": 5.699, + "args": { + "External id": 251846,"Record function id": 0, "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226372.814, "dur": 3.828, + "args": { + "External id": 251847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226373.433, "dur": 2.799, + "args": { + "External id": 251848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226375.417, "dur": 0.742, + "args": { + "External id": 251849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226380.133, "dur": 5.021, + "args": { + "External id": 251850,"Record function id": 0, "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918226381.156, "dur": 3.582, + "args": { + "External id": 251851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918226381.674, "dur": 2.682, + "args": { + "External id": 251852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918226381.988, "dur": 2.298, + "args": { + "External id": 251853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918226390.350, "dur": 14986.961, + "args": { + "External id": 251854,"Record function id": 0, "Sequence number": 2987567, "Fwd thread id": 1, "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918226391.604, "dur": 14977.003, + "args": { + "External id": 251855,"Sequence number": 2987567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 1998 + } + }, + { + "ph": "f", "id": 177, "pid": 4183438, "tid": 31367, "ts": 667918226391.604, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918226422.078, "dur": 37.429, + "args": { + "External id": 251856,"Record function id": 0, "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918226466.488, "dur": 63.587, + "args": { + "External id": 251857,"Record function id": 0, "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 4183438, "tid": 31367, + "ts": 667918226537.313, "dur": 14824.162, + "args": { + "External id": 251858,"Record function id": 0, "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918226630.809, "dur": 6.549, + "args": { + "External id": 251859,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918226646.567, "dur": 45.592, + "args": { + "External id": 251860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918226709.037, "dur": 13939.228, + "args": { + "External id": 251861,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918226723.340, "dur": 13916.469, + "args": { + "External id": 251862,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918226748.109, "dur": 14.516, + "args": { + "External id": 251863,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918226767.901, "dur": 13835.209, + "args": { + "External id": 251864,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918226770.286, "dur": 13832.195, + "args": { + "External id": 251865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918226774.278, "dur": 6.039, + "args": { + "External id": 251866,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918226783.701, "dur": 13814.879, + "args": { + "External id": 251867,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918240760.985, "dur": 10.028, + "args": { + "External id": 251868,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918240763.966, "dur": 6.477, + "args": { + "External id": 251869,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918240799.527, "dur": 262.630, + "args": { + "External id": 251870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918240828.665, "dur": 229.147, + "args": { + "External id": 251871,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2014, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918240839.773, "dur": 213.134, + "args": { + "External id": 251872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918241080.684, "dur": 2.230, + "args": { + "External id": 251873,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2016, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241132.349, "dur": 6.363, + "args": { + "External id": 251874,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241184.334, "dur": 3.558, + "args": { + "External id": 251875,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241223.085, "dur": 1.656, + "args": { + "External id": 251876,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241238.000, "dur": 0.919, + "args": { + "External id": 251877,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241251.252, "dur": 0.735, + "args": { + "External id": 251878,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241262.723, "dur": 2.644, + "args": { + "External id": 251879,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241277.687, "dur": 1.124, + "args": { + "External id": 251880,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241289.795, "dur": 1.863, + "args": { + "External id": 251881,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241301.980, "dur": 0.800, + "args": { + "External id": 251882,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918241393.034, "dur": 2650.989, + "args": { + "External id": 251883,"Record function id": 0, "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918241411.231, "dur": 999.566, + "args": { + "External id": 251884,"Record function id": 0, "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918241424.517, "dur": 345.549, + "args": { + "External id": 251885,"Record function id": 0, "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241500.481, "dur": 6.193, + "args": { + "External id": 251886,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241509.884, "dur": 1.425, + "args": { + "External id": 251887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241512.932, "dur": 0.768, + "args": { + "External id": 251888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241516.774, "dur": 0.818, + "args": { + "External id": 251889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241519.056, "dur": 0.811, + "args": { + "External id": 251890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241520.986, "dur": 1.053, + "args": { + "External id": 251891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241523.620, "dur": 1.667, + "args": { + "External id": 251892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241528.362, "dur": 0.760, + "args": { + "External id": 251893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241530.429, "dur": 2.496, + "args": { + "External id": 251894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918241534.105, "dur": 0.820, + "args": { + "External id": 251895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918241551.806, "dur": 186.548, + "args": { + "External id": 251896,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918241566.024, "dur": 167.390, + "args": { + "External id": 251897,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918241582.116, "dur": 13.479, + "args": { + "External id": 251898,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918241600.328, "dur": 103.103, + "args": { + "External id": 251899,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918241602.684, "dur": 100.430, + "args": { + "External id": 251900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241607.334, "dur": 6.808, + "args": { + "External id": 251901,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918241615.988, "dur": 85.777, + "args": { + "External id": 251902,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 4183438, "tid": 31367, + "ts": 667918241849.451, "dur": 553.787, + "args": { + "External id": 251903,"Record function id": 0, "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918241866.535, "dur": 524.160, + "args": { + "External id": 251904,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918241939.295, "dur": 5.530, + "args": { + "External id": 251905,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918241960.017, "dur": 23.885, + "args": { + "External id": 251906,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241964.366, "dur": 1.385, + "args": { + "External id": 251907,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241968.787, "dur": 0.398, + "args": { + "External id": 251908,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241969.976, "dur": 0.313, + "args": { + "External id": 251909,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241971.203, "dur": 3.141, + "args": { + "External id": 251910,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241975.063, "dur": 0.403, + "args": { + "External id": 251911,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241976.085, "dur": 0.297, + "args": { + "External id": 251912,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241978.068, "dur": 0.282, + "args": { + "External id": 251913,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241978.862, "dur": 0.298, + "args": { + "External id": 251914,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918241979.929, "dur": 0.256, + "args": { + "External id": 251915,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918241994.064, "dur": 29.516, + "args": { + "External id": 251916,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918242052.891, "dur": 88.618, + "args": { + "External id": 251917,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918242062.656, "dur": 3.074, + "args": { + "External id": 251918,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918242071.592, "dur": 9.587, + "args": { + "External id": 251919,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918242075.396, "dur": 5.375, + "args": { + "External id": 251920,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242078.353, "dur": 1.241, + "args": { + "External id": 251921,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918242087.439, "dur": 20.743, + "args": { + "External id": 251922,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242088.606, "dur": 2.220, + "args": { + "External id": 251923,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242091.728, "dur": 0.715, + "args": { + "External id": 251924,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242093.193, "dur": 0.468, + "args": { + "External id": 251925,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242095.596, "dur": 0.337, + "args": { + "External id": 251926,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242096.790, "dur": 0.385, + "args": { + "External id": 251927,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242097.840, "dur": 0.926, + "args": { + "External id": 251928,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242099.408, "dur": 0.487, + "args": { + "External id": 251929,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242100.435, "dur": 0.339, + "args": { + "External id": 251930,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918242102.818, "dur": 1.896, + "args": { + "External id": 251931,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918242116.899, "dur": 16.993, + "args": { + "External id": 251932,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918242181.140, "dur": 140.592, + "args": { + "External id": 251933,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918242224.577, "dur": 93.264, + "args": { + "External id": 251934,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2077, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918242235.468, "dur": 77.751, + "args": { + "External id": 251935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918242337.917, "dur": 1.862, + "args": { + "External id": 251936,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2079, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918242417.030, "dur": 1607.470, + "args": { + "External id": 251937,"Sequence number": 2987566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2080 + } + }, + { + "ph": "f", "id": 178, "pid": 4183438, "tid": 31367, "ts": 667918242417.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918242528.203, "dur": 100.927, + "args": { + "External id": 251938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918242711.772, "dur": 40.472, + "args": { + "External id": 251939,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918242773.138, "dur": 52.955, + "args": { + "External id": 251940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918242835.831, "dur": 27.502, + "args": { + "External id": 251941,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918242872.434, "dur": 33.481, + "args": { + "External id": 251942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918242911.766, "dur": 26.023, + "args": { + "External id": 251943,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918242945.568, "dur": 28.873, + "args": { + "External id": 251944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918242998.668, "dur": 21.109, + "args": { + "External id": 251945,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918243037.384, "dur": 26.753, + "args": { + "External id": 251946,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918243084.062, "dur": 17.792, + "args": { + "External id": 251947,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918243113.480, "dur": 16.557, + "args": { + "External id": 251948,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243138.815, "dur": 35.787, + "args": { + "External id": 251949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243177.787, "dur": 46.963, + "args": { + "External id": 251950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918243258.737, "dur": 173.414, + "args": { + "External id": 251951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918243339.838, "dur": 6.518, + "args": { + "External id": 251952,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918243348.071, "dur": 2.236, + "args": { + "External id": 251953,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918243468.746, "dur": 23.346, + "args": { + "External id": 251954,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918243505.619, "dur": 14.770, + "args": { + "External id": 251955,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243527.952, "dur": 47.088, + "args": { + "External id": 251956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243583.256, "dur": 31.598, + "args": { + "External id": 251957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243621.333, "dur": 28.109, + "args": { + "External id": 251958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243689.114, "dur": 34.866, + "args": { + "External id": 251959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243733.854, "dur": 27.705, + "args": { + "External id": 251960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918243771.302, "dur": 27.939, + "args": { + "External id": 251961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918243832.269, "dur": 28.306, + "args": { + "External id": 251962,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918243883.438, "dur": 22.331, + "args": { + "External id": 251963,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918243920.866, "dur": 17.395, + "args": { + "External id": 251964,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918243953.841, "dur": 13.501, + "args": { + "External id": 251965,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918243978.251, "dur": 17.709, + "args": { + "External id": 251966,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244069.511, "dur": 16.181, + "args": { + "External id": 251967,"Record function id": 0, "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244072.398, "dur": 12.411, + "args": { + "External id": 251968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244076.823, "dur": 7.181, + "args": { + "External id": 251969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244078.360, "dur": 5.549, + "args": { + "External id": 251970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244089.498, "dur": 4.144, + "args": { + "External id": 251971,"Record function id": 0, "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244090.658, "dur": 2.562, + "args": { + "External id": 251972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244091.417, "dur": 1.358, + "args": { + "External id": 251973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244091.928, "dur": 0.757, + "args": { + "External id": 251974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244096.911, "dur": 3.966, + "args": { + "External id": 251975,"Record function id": 0, "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244098.030, "dur": 2.443, + "args": { + "External id": 251976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244098.695, "dur": 1.359, + "args": { + "External id": 251977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244099.205, "dur": 0.763, + "args": { + "External id": 251978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244103.982, "dur": 4.236, + "args": { + "External id": 251979,"Record function id": 0, "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244105.238, "dur": 2.595, + "args": { + "External id": 251980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244106.149, "dur": 1.244, + "args": { + "External id": 251981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244106.569, "dur": 0.726, + "args": { + "External id": 251982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244111.201, "dur": 5.212, + "args": { + "External id": 251983,"Record function id": 0, "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244112.505, "dur": 3.490, + "args": { + "External id": 251984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244113.062, "dur": 2.525, + "args": { + "External id": 251985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244114.711, "dur": 0.776, + "args": { + "External id": 251986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244119.407, "dur": 3.153, + "args": { + "External id": 251987,"Record function id": 0, "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244120.340, "dur": 1.820, + "args": { + "External id": 251988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244120.792, "dur": 0.964, + "args": { + "External id": 251989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244121.160, "dur": 0.523, + "args": { + "External id": 251990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244125.764, "dur": 3.238, + "args": { + "External id": 251991,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244126.605, "dur": 2.020, + "args": { + "External id": 251992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244127.105, "dur": 1.115, + "args": { + "External id": 251993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244127.449, "dur": 0.700, + "args": { + "External id": 251994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244131.994, "dur": 5.315, + "args": { + "External id": 251995,"Record function id": 0, "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244133.038, "dur": 3.883, + "args": { + "External id": 251996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244133.489, "dur": 3.017, + "args": { + "External id": 251997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244133.846, "dur": 2.587, + "args": { + "External id": 251998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244140.247, "dur": 5.468, + "args": { + "External id": 251999,"Record function id": 0, "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918244141.129, "dur": 4.185, + "args": { + "External id": 252000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918244141.691, "dur": 3.209, + "args": { + "External id": 252001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918244144.045, "dur": 0.760, + "args": { + "External id": 252002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918244149.360, "dur": 17631.728, + "args": { + "External id": 252003,"Record function id": 0, "Sequence number": 2987565, "Fwd thread id": 1, "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918244150.769, "dur": 17621.585, + "args": { + "External id": 252004,"Sequence number": 2987565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2147 + } + }, + { + "ph": "f", "id": 179, "pid": 4183438, "tid": 31367, "ts": 667918244150.769, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918244179.414, "dur": 58.285, + "args": { + "External id": 252005,"Record function id": 0, "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918244246.505, "dur": 60.909, + "args": { + "External id": 252006,"Record function id": 0, "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 4183438, "tid": 31367, + "ts": 667918244314.199, "dur": 17449.927, + "args": { + "External id": 252007,"Record function id": 0, "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918244404.055, "dur": 6.701, + "args": { + "External id": 252008,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918244421.134, "dur": 5.281, + "args": { + "External id": 252009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918244444.056, "dur": 16521.405, + "args": { + "External id": 252010,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918244456.998, "dur": 16499.783, + "args": { + "External id": 252011,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918244480.949, "dur": 13.934, + "args": { + "External id": 252012,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918244502.815, "dur": 16419.393, + "args": { + "External id": 252013,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918244506.315, "dur": 16415.168, + "args": { + "External id": 252014,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918244510.574, "dur": 4.422, + "args": { + "External id": 252015,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918244516.530, "dur": 16401.200, + "args": { + "External id": 252016,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918261055.855, "dur": 8.439, + "args": { + "External id": 252017,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918261058.545, "dur": 5.396, + "args": { + "External id": 252018,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918261095.122, "dur": 342.634, + "args": { + "External id": 252019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918261125.917, "dur": 307.128, + "args": { + "External id": 252020,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2163, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918261136.777, "dur": 288.643, + "args": { + "External id": 252021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918261459.908, "dur": 2.434, + "args": { + "External id": 252022,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2165, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261520.538, "dur": 6.665, + "args": { + "External id": 252023,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261572.874, "dur": 1.644, + "args": { + "External id": 252024,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261589.546, "dur": 1.208, + "args": { + "External id": 252025,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261601.704, "dur": 0.887, + "args": { + "External id": 252026,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261614.776, "dur": 0.783, + "args": { + "External id": 252027,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261627.226, "dur": 1.240, + "args": { + "External id": 252028,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261638.989, "dur": 0.816, + "args": { + "External id": 252029,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261684.716, "dur": 2.485, + "args": { + "External id": 252030,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918261702.552, "dur": 0.931, + "args": { + "External id": 252031,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918261796.468, "dur": 2698.823, + "args": { + "External id": 252032,"Record function id": 0, "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918261814.086, "dur": 1015.889, + "args": { + "External id": 252033,"Record function id": 0, "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918261828.384, "dur": 301.533, + "args": { + "External id": 252034,"Record function id": 0, "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261907.509, "dur": 3.886, + "args": { + "External id": 252035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261914.702, "dur": 0.928, + "args": { + "External id": 252036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261917.543, "dur": 0.889, + "args": { + "External id": 252037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261920.270, "dur": 0.887, + "args": { + "External id": 252038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261922.690, "dur": 0.796, + "args": { + "External id": 252039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261926.461, "dur": 0.922, + "args": { + "External id": 252040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261929.201, "dur": 1.702, + "args": { + "External id": 252041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261932.456, "dur": 2.481, + "args": { + "External id": 252042,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261936.665, "dur": 0.998, + "args": { + "External id": 252043,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918261940.526, "dur": 0.873, + "args": { + "External id": 252044,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918261959.635, "dur": 143.147, + "args": { + "External id": 252045,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918261975.418, "dur": 123.235, + "args": { + "External id": 252046,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918261989.752, "dur": 13.413, + "args": { + "External id": 252047,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918262005.691, "dur": 65.356, + "args": { + "External id": 252048,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918262009.526, "dur": 61.229, + "args": { + "External id": 252049,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262013.982, "dur": 5.719, + "args": { + "External id": 252050,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918262021.474, "dur": 48.584, + "args": { + "External id": 252051,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 4183438, "tid": 31367, + "ts": 667918262230.633, "dur": 591.130, + "args": { + "External id": 252052,"Record function id": 0, "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918262247.392, "dur": 561.559, + "args": { + "External id": 252053,"Record function id": 0, "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918262304.100, "dur": 5.404, + "args": { + "External id": 252054,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918262325.453, "dur": 30.088, + "args": { + "External id": 252055,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262330.622, "dur": 1.522, + "args": { + "External id": 252056,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262335.027, "dur": 0.321, + "args": { + "External id": 252057,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262336.617, "dur": 2.805, + "args": { + "External id": 252058,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262340.433, "dur": 0.425, + "args": { + "External id": 252059,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262343.112, "dur": 0.348, + "args": { + "External id": 252060,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262344.843, "dur": 0.370, + "args": { + "External id": 252061,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262346.386, "dur": 1.211, + "args": { + "External id": 252062,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262348.945, "dur": 0.421, + "args": { + "External id": 252063,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262350.931, "dur": 0.321, + "args": { + "External id": 252064,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918262367.617, "dur": 32.479, + "args": { + "External id": 252065,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918262430.106, "dur": 98.035, + "args": { + "External id": 252066,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918262439.285, "dur": 3.534, + "args": { + "External id": 252067,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918262448.135, "dur": 11.552, + "args": { + "External id": 252068,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918262451.903, "dur": 7.374, + "args": { + "External id": 252069,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262455.689, "dur": 2.493, + "args": { + "External id": 252070,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918262465.926, "dur": 26.032, + "args": { + "External id": 252071,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262467.855, "dur": 1.330, + "args": { + "External id": 252072,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262470.666, "dur": 0.424, + "args": { + "External id": 252073,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262472.469, "dur": 0.553, + "args": { + "External id": 252074,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262474.860, "dur": 0.633, + "args": { + "External id": 252075,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262477.089, "dur": 0.282, + "args": { + "External id": 252076,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262478.692, "dur": 0.397, + "args": { + "External id": 252077,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262481.732, "dur": 0.320, + "args": { + "External id": 252078,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262483.486, "dur": 2.201, + "args": { + "External id": 252079,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918262487.096, "dur": 1.124, + "args": { + "External id": 252080,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918262502.107, "dur": 19.470, + "args": { + "External id": 252081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918262570.894, "dur": 163.184, + "args": { + "External id": 252082,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918262597.402, "dur": 132.650, + "args": { + "External id": 252083,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2226, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918262606.778, "dur": 117.961, + "args": { + "External id": 252084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918262753.171, "dur": 2.090, + "args": { + "External id": 252085,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2228, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918262837.370, "dur": 1637.184, + "args": { + "External id": 252086,"Sequence number": 2987564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2229 + } + }, + { + "ph": "f", "id": 180, "pid": 4183438, "tid": 31367, "ts": 667918262837.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918262946.753, "dur": 101.175, + "args": { + "External id": 252087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918263087.621, "dur": 36.990, + "args": { + "External id": 252088,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918263142.893, "dur": 41.707, + "args": { + "External id": 252089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263216.844, "dur": 34.859, + "args": { + "External id": 252090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263259.073, "dur": 33.463, + "args": { + "External id": 252091,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263299.345, "dur": 24.624, + "args": { + "External id": 252092,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263332.440, "dur": 32.753, + "args": { + "External id": 252093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918263391.243, "dur": 25.177, + "args": { + "External id": 252094,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918263435.379, "dur": 32.253, + "args": { + "External id": 252095,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918263486.429, "dur": 26.412, + "args": { + "External id": 252096,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918263526.350, "dur": 15.741, + "args": { + "External id": 252097,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263551.327, "dur": 37.419, + "args": { + "External id": 252098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263591.783, "dur": 30.336, + "args": { + "External id": 252099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918263649.646, "dur": 217.147, + "args": { + "External id": 252100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918263769.707, "dur": 6.269, + "args": { + "External id": 252101,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918263778.283, "dur": 2.508, + "args": { + "External id": 252102,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918263900.692, "dur": 25.102, + "args": { + "External id": 252103,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918263937.096, "dur": 16.034, + "args": { + "External id": 252104,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918263962.568, "dur": 49.774, + "args": { + "External id": 252105,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918264018.237, "dur": 32.266, + "args": { + "External id": 252106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918264065.417, "dur": 29.228, + "args": { + "External id": 252107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918264099.989, "dur": 28.480, + "args": { + "External id": 252108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918264135.650, "dur": 46.837, + "args": { + "External id": 252109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918264217.522, "dur": 40.750, + "args": { + "External id": 252110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918264283.983, "dur": 21.660, + "args": { + "External id": 252111,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918264325.378, "dur": 26.569, + "args": { + "External id": 252112,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918264365.680, "dur": 16.226, + "args": { + "External id": 252113,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918264397.731, "dur": 14.088, + "args": { + "External id": 252114,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918264422.911, "dur": 17.267, + "args": { + "External id": 252115,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264516.843, "dur": 18.020, + "args": { + "External id": 252116,"Record function id": 0, "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264519.658, "dur": 14.257, + "args": { + "External id": 252117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264523.644, "dur": 9.451, + "args": { + "External id": 252118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264529.050, "dur": 3.957, + "args": { + "External id": 252119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264538.670, "dur": 4.631, + "args": { + "External id": 252120,"Record function id": 0, "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264539.985, "dur": 2.898, + "args": { + "External id": 252121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264540.758, "dur": 1.556, + "args": { + "External id": 252122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264541.518, "dur": 0.700, + "args": { + "External id": 252123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264546.476, "dur": 4.033, + "args": { + "External id": 252124,"Record function id": 0, "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264547.910, "dur": 2.191, + "args": { + "External id": 252125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264548.381, "dur": 1.269, + "args": { + "External id": 252126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264548.916, "dur": 0.662, + "args": { + "External id": 252127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264553.697, "dur": 3.707, + "args": { + "External id": 252128,"Record function id": 0, "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264554.922, "dur": 2.067, + "args": { + "External id": 252129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264555.428, "dur": 1.015, + "args": { + "External id": 252130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264555.758, "dur": 0.612, + "args": { + "External id": 252131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264560.582, "dur": 4.123, + "args": { + "External id": 252132,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264561.654, "dur": 2.640, + "args": { + "External id": 252133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264562.282, "dur": 1.467, + "args": { + "External id": 252134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264562.661, "dur": 1.014, + "args": { + "External id": 252135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264567.776, "dur": 3.875, + "args": { + "External id": 252136,"Record function id": 0, "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264568.913, "dur": 2.356, + "args": { + "External id": 252137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264569.544, "dur": 1.296, + "args": { + "External id": 252138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264570.132, "dur": 0.635, + "args": { + "External id": 252139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264574.748, "dur": 6.632, + "args": { + "External id": 252140,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264575.920, "dur": 5.013, + "args": { + "External id": 252141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264576.370, "dur": 4.161, + "args": { + "External id": 252142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264577.771, "dur": 2.694, + "args": { + "External id": 252143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264584.391, "dur": 4.229, + "args": { + "External id": 252144,"Record function id": 0, "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264585.709, "dur": 2.523, + "args": { + "External id": 252145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264586.166, "dur": 1.460, + "args": { + "External id": 252146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264586.673, "dur": 0.840, + "args": { + "External id": 252147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264591.564, "dur": 3.269, + "args": { + "External id": 252148,"Record function id": 0, "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918264592.566, "dur": 1.861, + "args": { + "External id": 252149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918264593.042, "dur": 0.976, + "args": { + "External id": 252150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918264593.355, "dur": 0.592, + "args": { + "External id": 252151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918264600.165, "dur": 14751.385, + "args": { + "External id": 252152,"Record function id": 0, "Sequence number": 2987563, "Fwd thread id": 1, "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918264601.876, "dur": 14741.256, + "args": { + "External id": 252153,"Sequence number": 2987563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2296 + } + }, + { + "ph": "f", "id": 181, "pid": 4183438, "tid": 31367, "ts": 667918264601.876, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918264629.755, "dur": 75.179, + "args": { + "External id": 252154,"Record function id": 0, "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918264714.741, "dur": 66.800, + "args": { + "External id": 252155,"Record function id": 0, "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 4183438, "tid": 31367, + "ts": 667918264788.105, "dur": 14547.198, + "args": { + "External id": 252156,"Record function id": 0, "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918264880.238, "dur": 7.479, + "args": { + "External id": 252157,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918264901.135, "dur": 5.256, + "args": { + "External id": 252158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918264919.549, "dur": 13702.310, + "args": { + "External id": 252159,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918264933.678, "dur": 13679.449, + "args": { + "External id": 252160,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918264959.047, "dur": 14.046, + "args": { + "External id": 252161,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918264978.051, "dur": 13600.180, + "args": { + "External id": 252162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918264982.083, "dur": 13595.362, + "args": { + "External id": 252163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918264985.973, "dur": 4.907, + "args": { + "External id": 252164,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918264992.494, "dur": 13581.070, + "args": { + "External id": 252165,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918278735.607, "dur": 9.432, + "args": { + "External id": 252166,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918278738.383, "dur": 6.180, + "args": { + "External id": 252167,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918278774.729, "dur": 262.059, + "args": { + "External id": 252168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918278802.801, "dur": 229.805, + "args": { + "External id": 252169,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2312, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918278814.718, "dur": 213.342, + "args": { + "External id": 252170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918279054.091, "dur": 2.119, + "args": { + "External id": 252171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2314, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279107.196, "dur": 8.400, + "args": { + "External id": 252172,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279158.919, "dur": 1.179, + "args": { + "External id": 252173,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279175.396, "dur": 1.305, + "args": { + "External id": 252174,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279189.651, "dur": 0.790, + "args": { + "External id": 252175,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279222.258, "dur": 3.142, + "args": { + "External id": 252176,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279235.899, "dur": 0.980, + "args": { + "External id": 252177,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279248.118, "dur": 1.080, + "args": { + "External id": 252178,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279261.124, "dur": 1.480, + "args": { + "External id": 252179,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279272.578, "dur": 2.585, + "args": { + "External id": 252180,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918279368.089, "dur": 2623.999, + "args": { + "External id": 252181,"Record function id": 0, "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918279387.125, "dur": 993.556, + "args": { + "External id": 252182,"Record function id": 0, "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918279400.782, "dur": 337.129, + "args": { + "External id": 252183,"Record function id": 0, "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279473.951, "dur": 3.977, + "args": { + "External id": 252184,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279481.292, "dur": 1.065, + "args": { + "External id": 252185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279484.116, "dur": 0.844, + "args": { + "External id": 252186,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279486.804, "dur": 0.789, + "args": { + "External id": 252187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279489.344, "dur": 0.756, + "args": { + "External id": 252188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279491.749, "dur": 0.691, + "args": { + "External id": 252189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279494.353, "dur": 3.088, + "args": { + "External id": 252190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279499.139, "dur": 0.757, + "args": { + "External id": 252191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279501.660, "dur": 0.646, + "args": { + "External id": 252192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918279504.148, "dur": 0.572, + "args": { + "External id": 252193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918279523.004, "dur": 183.214, + "args": { + "External id": 252194,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918279538.583, "dur": 162.493, + "args": { + "External id": 252195,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918279554.629, "dur": 13.246, + "args": { + "External id": 252196,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918279570.767, "dur": 62.841, + "args": { + "External id": 252197,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918279573.455, "dur": 59.831, + "args": { + "External id": 252198,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279577.770, "dur": 7.035, + "args": { + "External id": 252199,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918279586.465, "dur": 46.241, + "args": { + "External id": 252200,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 4183438, "tid": 31367, + "ts": 667918279820.107, "dur": 553.453, + "args": { + "External id": 252201,"Record function id": 0, "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918279836.113, "dur": 524.824, + "args": { + "External id": 252202,"Record function id": 0, "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918279894.843, "dur": 5.699, + "args": { + "External id": 252203,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918279916.079, "dur": 29.210, + "args": { + "External id": 252204,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279920.556, "dur": 2.614, + "args": { + "External id": 252205,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279925.255, "dur": 2.047, + "args": { + "External id": 252206,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279928.576, "dur": 0.374, + "args": { + "External id": 252207,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279931.155, "dur": 0.348, + "args": { + "External id": 252208,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279932.693, "dur": 0.377, + "args": { + "External id": 252209,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279934.830, "dur": 0.569, + "args": { + "External id": 252210,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279937.347, "dur": 0.309, + "args": { + "External id": 252211,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279938.770, "dur": 0.287, + "args": { + "External id": 252212,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918279940.087, "dur": 1.037, + "args": { + "External id": 252213,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918279957.798, "dur": 31.271, + "args": { + "External id": 252214,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918280019.864, "dur": 99.035, + "args": { + "External id": 252215,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918280029.529, "dur": 5.684, + "args": { + "External id": 252216,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918280040.312, "dur": 10.394, + "args": { + "External id": 252217,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918280044.660, "dur": 5.619, + "args": { + "External id": 252218,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280048.553, "dur": 0.581, + "args": { + "External id": 252219,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918280057.811, "dur": 24.608, + "args": { + "External id": 252220,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280059.828, "dur": 0.844, + "args": { + "External id": 252221,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280062.866, "dur": 1.143, + "args": { + "External id": 252222,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280065.130, "dur": 0.508, + "args": { + "External id": 252223,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280067.179, "dur": 0.398, + "args": { + "External id": 252224,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280069.447, "dur": 0.281, + "args": { + "External id": 252225,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280070.887, "dur": 0.730, + "args": { + "External id": 252226,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280072.905, "dur": 1.964, + "args": { + "External id": 252227,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280076.653, "dur": 0.398, + "args": { + "External id": 252228,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918280078.414, "dur": 0.368, + "args": { + "External id": 252229,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918280093.176, "dur": 18.907, + "args": { + "External id": 252230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918280161.771, "dur": 132.633, + "args": { + "External id": 252231,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918280184.006, "dur": 106.734, + "args": { + "External id": 252232,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2375, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918280208.286, "dur": 78.088, + "args": { + "External id": 252233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918280307.984, "dur": 1.993, + "args": { + "External id": 252234,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2377, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918280388.341, "dur": 1583.184, + "args": { + "External id": 252235,"Sequence number": 2987562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2378 + } + }, + { + "ph": "f", "id": 182, "pid": 4183438, "tid": 31367, "ts": 667918280388.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918280498.100, "dur": 101.053, + "args": { + "External id": 252236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918280637.089, "dur": 78.865, + "args": { + "External id": 252237,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918280737.300, "dur": 50.419, + "args": { + "External id": 252238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918280797.884, "dur": 27.380, + "args": { + "External id": 252239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918280831.131, "dur": 34.310, + "args": { + "External id": 252240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918280873.511, "dur": 21.077, + "args": { + "External id": 252241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918280901.607, "dur": 29.560, + "args": { + "External id": 252242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918280953.805, "dur": 26.966, + "args": { + "External id": 252243,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918281003.857, "dur": 32.209, + "args": { + "External id": 252244,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918281053.827, "dur": 19.709, + "args": { + "External id": 252245,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918281083.649, "dur": 14.483, + "args": { + "External id": 252246,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281107.013, "dur": 39.088, + "args": { + "External id": 252247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281149.036, "dur": 29.863, + "args": { + "External id": 252248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918281224.236, "dur": 172.434, + "args": { + "External id": 252249,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918281302.669, "dur": 6.968, + "args": { + "External id": 252250,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918281311.660, "dur": 4.314, + "args": { + "External id": 252251,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918281426.476, "dur": 22.610, + "args": { + "External id": 252252,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918281459.487, "dur": 14.250, + "args": { + "External id": 252253,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281481.551, "dur": 46.783, + "args": { + "External id": 252254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281534.107, "dur": 30.493, + "args": { + "External id": 252255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281570.303, "dur": 27.041, + "args": { + "External id": 252256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281603.283, "dur": 28.150, + "args": { + "External id": 252257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281636.611, "dur": 63.594, + "args": { + "External id": 252258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918281710.276, "dur": 31.486, + "args": { + "External id": 252259,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918281760.526, "dur": 36.747, + "args": { + "External id": 252260,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918281820.597, "dur": 28.819, + "args": { + "External id": 252261,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918281863.391, "dur": 15.286, + "args": { + "External id": 252262,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918281893.632, "dur": 16.639, + "args": { + "External id": 252263,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918281924.196, "dur": 15.606, + "args": { + "External id": 252264,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282014.632, "dur": 15.262, + "args": { + "External id": 252265,"Record function id": 0, "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282018.341, "dur": 10.706, + "args": { + "External id": 252266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282022.450, "dur": 5.645, + "args": { + "External id": 252267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282023.918, "dur": 4.091, + "args": { + "External id": 252268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282033.683, "dur": 4.766, + "args": { + "External id": 252269,"Record function id": 0, "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282035.575, "dur": 2.422, + "args": { + "External id": 252270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282036.168, "dur": 1.283, + "args": { + "External id": 252271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282036.602, "dur": 0.754, + "args": { + "External id": 252272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282041.635, "dur": 4.211, + "args": { + "External id": 252273,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282043.179, "dur": 2.267, + "args": { + "External id": 252274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282043.794, "dur": 1.192, + "args": { + "External id": 252275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282044.213, "dur": 0.682, + "args": { + "External id": 252276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282049.033, "dur": 5.660, + "args": { + "External id": 252277,"Record function id": 0, "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282049.965, "dur": 4.304, + "args": { + "External id": 252278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282050.486, "dur": 3.370, + "args": { + "External id": 252279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282051.352, "dur": 2.430, + "args": { + "External id": 252280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282057.901, "dur": 4.430, + "args": { + "External id": 252281,"Record function id": 0, "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282059.212, "dur": 2.735, + "args": { + "External id": 252282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282059.715, "dur": 1.791, + "args": { + "External id": 252283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282060.637, "dur": 0.801, + "args": { + "External id": 252284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282065.512, "dur": 4.101, + "args": { + "External id": 252285,"Record function id": 0, "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282066.875, "dur": 2.309, + "args": { + "External id": 252286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282067.355, "dur": 1.396, + "args": { + "External id": 252287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282067.902, "dur": 0.775, + "args": { + "External id": 252288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282072.776, "dur": 3.895, + "args": { + "External id": 252289,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282073.877, "dur": 2.407, + "args": { + "External id": 252290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282074.371, "dur": 1.470, + "args": { + "External id": 252291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282075.084, "dur": 0.682, + "args": { + "External id": 252292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282079.876, "dur": 4.327, + "args": { + "External id": 252293,"Record function id": 0, "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282081.344, "dur": 2.453, + "args": { + "External id": 252294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282081.932, "dur": 1.419, + "args": { + "External id": 252295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282082.550, "dur": 0.728, + "args": { + "External id": 252296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282087.387, "dur": 4.377, + "args": { + "External id": 252297,"Record function id": 0, "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918282088.875, "dur": 2.495, + "args": { + "External id": 252298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918282089.627, "dur": 1.338, + "args": { + "External id": 252299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918282090.297, "dur": 0.595, + "args": { + "External id": 252300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918282095.189, "dur": 14921.638, + "args": { + "External id": 252301,"Record function id": 0, "Sequence number": 2987561, "Fwd thread id": 1, "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918282096.543, "dur": 14912.120, + "args": { + "External id": 252302,"Sequence number": 2987561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2445 + } + }, + { + "ph": "f", "id": 183, "pid": 4183438, "tid": 31367, "ts": 667918282096.543, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918282125.194, "dur": 38.487, + "args": { + "External id": 252303,"Record function id": 0, "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918282174.611, "dur": 85.845, + "args": { + "External id": 252304,"Record function id": 0, "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 4183438, "tid": 31367, + "ts": 667918282268.620, "dur": 14732.372, + "args": { + "External id": 252305,"Record function id": 0, "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918282359.633, "dur": 7.027, + "args": { + "External id": 252306,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918282376.201, "dur": 5.316, + "args": { + "External id": 252307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918282396.505, "dur": 13902.513, + "args": { + "External id": 252308,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918282413.571, "dur": 13876.561, + "args": { + "External id": 252309,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918282435.207, "dur": 16.078, + "args": { + "External id": 252310,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918282455.912, "dur": 13797.805, + "args": { + "External id": 252311,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918282458.449, "dur": 13794.543, + "args": { + "External id": 252312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918282463.171, "dur": 8.014, + "args": { + "External id": 252313,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918282473.301, "dur": 13776.282, + "args": { + "External id": 252314,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918296389.194, "dur": 9.071, + "args": { + "External id": 252315,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918296392.055, "dur": 5.861, + "args": { + "External id": 252316,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918296424.922, "dur": 305.583, + "args": { + "External id": 252317,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918296455.721, "dur": 270.180, + "args": { + "External id": 252318,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2461, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918296468.677, "dur": 251.623, + "args": { + "External id": 252319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918296749.190, "dur": 2.017, + "args": { + "External id": 252320,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2463, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296806.361, "dur": 6.293, + "args": { + "External id": 252321,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296854.706, "dur": 1.462, + "args": { + "External id": 252322,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296871.257, "dur": 1.251, + "args": { + "External id": 252323,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296884.978, "dur": 0.934, + "args": { + "External id": 252324,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296896.858, "dur": 0.670, + "args": { + "External id": 252325,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296907.910, "dur": 0.881, + "args": { + "External id": 252326,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296918.534, "dur": 0.715, + "args": { + "External id": 252327,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296930.500, "dur": 1.619, + "args": { + "External id": 252328,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918296940.624, "dur": 0.736, + "args": { + "External id": 252329,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918297031.405, "dur": 2604.596, + "args": { + "External id": 252330,"Record function id": 0, "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918297050.075, "dur": 979.990, + "args": { + "External id": 252331,"Record function id": 0, "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918297063.658, "dur": 317.620, + "args": { + "External id": 252332,"Record function id": 0, "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297136.500, "dur": 4.064, + "args": { + "External id": 252333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297143.569, "dur": 0.978, + "args": { + "External id": 252334,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297146.451, "dur": 0.900, + "args": { + "External id": 252335,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297148.889, "dur": 2.582, + "args": { + "External id": 252336,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297152.846, "dur": 0.823, + "args": { + "External id": 252337,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297154.978, "dur": 0.785, + "args": { + "External id": 252338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297157.004, "dur": 1.698, + "args": { + "External id": 252339,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297160.304, "dur": 0.794, + "args": { + "External id": 252340,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297162.541, "dur": 0.686, + "args": { + "External id": 252341,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918297164.433, "dur": 1.131, + "args": { + "External id": 252342,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918297182.037, "dur": 168.568, + "args": { + "External id": 252343,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918297222.195, "dur": 123.697, + "args": { + "External id": 252344,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918297236.416, "dur": 13.593, + "args": { + "External id": 252345,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918297252.789, "dur": 66.377, + "args": { + "External id": 252346,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918297255.124, "dur": 63.699, + "args": { + "External id": 252347,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297259.135, "dur": 8.078, + "args": { + "External id": 252348,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918297268.821, "dur": 49.351, + "args": { + "External id": 252349,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 4183438, "tid": 31367, + "ts": 667918297458.880, "dur": 564.032, + "args": { + "External id": 252350,"Record function id": 0, "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918297475.230, "dur": 535.531, + "args": { + "External id": 252351,"Record function id": 0, "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918297528.934, "dur": 4.986, + "args": { + "External id": 252352,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918297548.950, "dur": 27.037, + "args": { + "External id": 252353,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297553.265, "dur": 1.567, + "args": { + "External id": 252354,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297556.869, "dur": 0.401, + "args": { + "External id": 252355,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297558.491, "dur": 0.330, + "args": { + "External id": 252356,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297560.752, "dur": 0.382, + "args": { + "External id": 252357,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297562.058, "dur": 0.462, + "args": { + "External id": 252358,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297563.610, "dur": 0.989, + "args": { + "External id": 252359,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297565.936, "dur": 1.991, + "args": { + "External id": 252360,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297569.026, "dur": 0.356, + "args": { + "External id": 252361,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297571.201, "dur": 0.468, + "args": { + "External id": 252362,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918297587.732, "dur": 29.094, + "args": { + "External id": 252363,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918297646.235, "dur": 145.067, + "args": { + "External id": 252364,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918297697.561, "dur": 5.285, + "args": { + "External id": 252365,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918297708.938, "dur": 10.956, + "args": { + "External id": 252366,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918297713.239, "dur": 6.225, + "args": { + "External id": 252367,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297717.038, "dur": 0.764, + "args": { + "External id": 252368,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918297727.285, "dur": 22.764, + "args": { + "External id": 252369,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297729.396, "dur": 0.421, + "args": { + "External id": 252370,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297731.328, "dur": 0.382, + "args": { + "External id": 252371,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297733.953, "dur": 0.418, + "args": { + "External id": 252372,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297735.795, "dur": 1.624, + "args": { + "External id": 252373,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297738.722, "dur": 0.418, + "args": { + "External id": 252374,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297741.022, "dur": 0.348, + "args": { + "External id": 252375,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297742.686, "dur": 0.325, + "args": { + "External id": 252376,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297744.088, "dur": 1.056, + "args": { + "External id": 252377,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918297746.510, "dur": 0.319, + "args": { + "External id": 252378,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918297760.555, "dur": 22.593, + "args": { + "External id": 252379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918297836.346, "dur": 110.780, + "args": { + "External id": 252380,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918297858.778, "dur": 84.878, + "args": { + "External id": 252381,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2524, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918297868.104, "dur": 71.433, + "args": { + "External id": 252382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918297959.811, "dur": 2.011, + "args": { + "External id": 252383,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2526, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918298036.475, "dur": 1578.075, + "args": { + "External id": 252384,"Sequence number": 2987560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2527 + } + }, + { + "ph": "f", "id": 184, "pid": 4183438, "tid": 31367, "ts": 667918298036.475, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298144.865, "dur": 121.848, + "args": { + "External id": 252385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918298310.439, "dur": 36.422, + "args": { + "External id": 252386,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918298363.536, "dur": 43.570, + "args": { + "External id": 252387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298416.113, "dur": 25.345, + "args": { + "External id": 252388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298449.380, "dur": 32.743, + "args": { + "External id": 252389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298488.798, "dur": 21.710, + "args": { + "External id": 252390,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298517.169, "dur": 29.819, + "args": { + "External id": 252391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918298568.945, "dur": 24.525, + "args": { + "External id": 252392,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918298611.325, "dur": 28.970, + "args": { + "External id": 252393,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918298700.889, "dur": 23.324, + "args": { + "External id": 252394,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918298737.981, "dur": 15.905, + "args": { + "External id": 252395,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298763.615, "dur": 44.866, + "args": { + "External id": 252396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918298812.359, "dur": 31.209, + "args": { + "External id": 252397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918298876.236, "dur": 173.930, + "args": { + "External id": 252398,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918298954.831, "dur": 6.372, + "args": { + "External id": 252399,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918298963.026, "dur": 3.023, + "args": { + "External id": 252400,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918299081.587, "dur": 25.441, + "args": { + "External id": 252401,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918299117.951, "dur": 15.599, + "args": { + "External id": 252402,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299141.518, "dur": 45.697, + "args": { + "External id": 252403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299211.738, "dur": 39.117, + "args": { + "External id": 252404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299260.642, "dur": 28.500, + "args": { + "External id": 252405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299293.897, "dur": 28.418, + "args": { + "External id": 252406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299328.455, "dur": 27.167, + "args": { + "External id": 252407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918299362.044, "dur": 28.268, + "args": { + "External id": 252408,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918299407.550, "dur": 21.829, + "args": { + "External id": 252409,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918299445.571, "dur": 23.192, + "args": { + "External id": 252410,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918299480.906, "dur": 15.149, + "args": { + "External id": 252411,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918299526.657, "dur": 22.005, + "args": { + "External id": 252412,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918299566.306, "dur": 18.926, + "args": { + "External id": 252413,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299703.802, "dur": 45.867, + "args": { + "External id": 252414,"Record function id": 0, "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299709.956, "dur": 38.008, + "args": { + "External id": 252415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299714.684, "dur": 30.949, + "args": { + "External id": 252416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299740.594, "dur": 4.650, + "args": { + "External id": 252417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299754.647, "dur": 3.988, + "args": { + "External id": 252418,"Record function id": 0, "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299755.987, "dur": 2.240, + "args": { + "External id": 252419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299756.590, "dur": 1.163, + "args": { + "External id": 252420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299756.956, "dur": 0.706, + "args": { + "External id": 252421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299761.777, "dur": 4.945, + "args": { + "External id": 252422,"Record function id": 0, "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299762.822, "dur": 3.485, + "args": { + "External id": 252423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299763.320, "dur": 2.575, + "args": { + "External id": 252424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299763.669, "dur": 2.140, + "args": { + "External id": 252425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299769.768, "dur": 3.829, + "args": { + "External id": 252426,"Record function id": 0, "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299770.923, "dur": 2.297, + "args": { + "External id": 252427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299771.431, "dur": 1.403, + "args": { + "External id": 252428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299771.988, "dur": 0.754, + "args": { + "External id": 252429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299776.755, "dur": 3.395, + "args": { + "External id": 252430,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299777.821, "dur": 1.941, + "args": { + "External id": 252431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299778.353, "dur": 0.954, + "args": { + "External id": 252432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299778.671, "dur": 0.566, + "args": { + "External id": 252433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299783.144, "dur": 3.529, + "args": { + "External id": 252434,"Record function id": 0, "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299784.195, "dur": 2.019, + "args": { + "External id": 252435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299784.767, "dur": 0.897, + "args": { + "External id": 252436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299785.045, "dur": 0.545, + "args": { + "External id": 252437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299789.754, "dur": 6.501, + "args": { + "External id": 252438,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299790.757, "dur": 5.056, + "args": { + "External id": 252439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299791.240, "dur": 4.171, + "args": { + "External id": 252440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299791.526, "dur": 3.782, + "args": { + "External id": 252441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299799.206, "dur": 3.595, + "args": { + "External id": 252442,"Record function id": 0, "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299800.255, "dur": 2.147, + "args": { + "External id": 252443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299800.739, "dur": 1.256, + "args": { + "External id": 252444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299801.364, "dur": 0.529, + "args": { + "External id": 252445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299805.603, "dur": 3.434, + "args": { + "External id": 252446,"Record function id": 0, "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918299806.539, "dur": 2.071, + "args": { + "External id": 252447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918299807.032, "dur": 1.192, + "args": { + "External id": 252448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918299807.570, "dur": 0.563, + "args": { + "External id": 252449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918299812.614, "dur": 15101.221, + "args": { + "External id": 252450,"Record function id": 0, "Sequence number": 2987559, "Fwd thread id": 1, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918299813.652, "dur": 15090.502, + "args": { + "External id": 252451,"Sequence number": 2987559, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2594 + } + }, + { + "ph": "f", "id": 185, "pid": 4183438, "tid": 31367, "ts": 667918299813.652, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918299844.988, "dur": 46.237, + "args": { + "External id": 252452,"Record function id": 0, "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918299898.661, "dur": 67.995, + "args": { + "External id": 252453,"Record function id": 0, "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 4183438, "tid": 31367, + "ts": 667918299973.281, "dur": 14922.819, + "args": { + "External id": 252454,"Record function id": 0, "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918300069.320, "dur": 7.482, + "args": { + "External id": 252455,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918300086.030, "dur": 6.101, + "args": { + "External id": 252456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918300108.016, "dur": 14021.518, + "args": { + "External id": 252457,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918300121.054, "dur": 13999.896, + "args": { + "External id": 252458,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918300142.684, "dur": 17.725, + "args": { + "External id": 252459,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918300165.108, "dur": 13921.806, + "args": { + "External id": 252460,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918300167.510, "dur": 13918.656, + "args": { + "External id": 252461,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918300171.321, "dur": 4.935, + "args": { + "External id": 252462,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918300177.834, "dur": 13904.630, + "args": { + "External id": 252463,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918314230.961, "dur": 9.853, + "args": { + "External id": 252464,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918314233.758, "dur": 6.503, + "args": { + "External id": 252465,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918314267.843, "dur": 317.218, + "args": { + "External id": 252466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918314295.374, "dur": 284.990, + "args": { + "External id": 252467,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2610, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918314307.387, "dur": 268.097, + "args": { + "External id": 252468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918314603.275, "dur": 2.231, + "args": { + "External id": 252469,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2612, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314692.648, "dur": 6.754, + "args": { + "External id": 252470,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314745.053, "dur": 1.275, + "args": { + "External id": 252471,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314762.205, "dur": 3.523, + "args": { + "External id": 252472,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314776.870, "dur": 0.825, + "args": { + "External id": 252473,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314789.332, "dur": 0.795, + "args": { + "External id": 252474,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314799.737, "dur": 0.865, + "args": { + "External id": 252475,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314811.508, "dur": 2.653, + "args": { + "External id": 252476,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314824.984, "dur": 2.096, + "args": { + "External id": 252477,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918314836.593, "dur": 0.818, + "args": { + "External id": 252478,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918314929.987, "dur": 2639.692, + "args": { + "External id": 252479,"Record function id": 0, "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918314948.337, "dur": 992.489, + "args": { + "External id": 252480,"Record function id": 0, "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918314962.957, "dur": 323.552, + "args": { + "External id": 252481,"Record function id": 0, "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315039.292, "dur": 4.341, + "args": { + "External id": 252482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315046.784, "dur": 1.038, + "args": { + "External id": 252483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315049.529, "dur": 3.327, + "args": { + "External id": 252484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315054.773, "dur": 0.858, + "args": { + "External id": 252485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315057.639, "dur": 0.912, + "args": { + "External id": 252486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315060.340, "dur": 1.221, + "args": { + "External id": 252487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315063.103, "dur": 1.812, + "args": { + "External id": 252488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315066.472, "dur": 0.719, + "args": { + "External id": 252489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315068.800, "dur": 0.749, + "args": { + "External id": 252490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918315071.210, "dur": 0.623, + "args": { + "External id": 252491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918315091.202, "dur": 162.768, + "args": { + "External id": 252492,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918315108.464, "dur": 141.128, + "args": { + "External id": 252493,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918315121.415, "dur": 15.118, + "args": { + "External id": 252494,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918315139.258, "dur": 82.609, + "args": { + "External id": 252495,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918315141.931, "dur": 79.529, + "args": { + "External id": 252496,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315146.338, "dur": 5.220, + "args": { + "External id": 252497,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918315153.641, "dur": 66.760, + "args": { + "External id": 252498,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 4183438, "tid": 31367, + "ts": 667918315365.991, "dur": 567.728, + "args": { + "External id": 252499,"Record function id": 0, "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918315381.938, "dur": 539.804, + "args": { + "External id": 252500,"Record function id": 0, "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918315436.538, "dur": 5.628, + "args": { + "External id": 252501,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918315458.264, "dur": 27.515, + "args": { + "External id": 252502,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315462.787, "dur": 1.808, + "args": { + "External id": 252503,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315466.690, "dur": 0.560, + "args": { + "External id": 252504,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315469.484, "dur": 0.430, + "args": { + "External id": 252505,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315470.744, "dur": 0.593, + "args": { + "External id": 252506,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315472.382, "dur": 0.490, + "args": { + "External id": 252507,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315473.975, "dur": 2.184, + "args": { + "External id": 252508,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315477.764, "dur": 0.526, + "args": { + "External id": 252509,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315479.792, "dur": 0.332, + "args": { + "External id": 252510,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315481.554, "dur": 0.483, + "args": { + "External id": 252511,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918315496.130, "dur": 29.863, + "args": { + "External id": 252512,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918315557.765, "dur": 92.385, + "args": { + "External id": 252513,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918315567.393, "dur": 3.291, + "args": { + "External id": 252514,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918315575.667, "dur": 9.847, + "args": { + "External id": 252515,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918315579.716, "dur": 5.398, + "args": { + "External id": 252516,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315583.304, "dur": 0.604, + "args": { + "External id": 252517,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918315591.971, "dur": 22.707, + "args": { + "External id": 252518,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315593.596, "dur": 0.631, + "args": { + "External id": 252519,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315595.923, "dur": 0.388, + "args": { + "External id": 252520,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315597.992, "dur": 2.746, + "args": { + "External id": 252521,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315602.263, "dur": 0.453, + "args": { + "External id": 252522,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315604.051, "dur": 0.453, + "args": { + "External id": 252523,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315605.753, "dur": 0.298, + "args": { + "External id": 252524,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315607.522, "dur": 0.310, + "args": { + "External id": 252525,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315608.908, "dur": 0.338, + "args": { + "External id": 252526,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918315610.610, "dur": 0.578, + "args": { + "External id": 252527,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918315624.288, "dur": 19.198, + "args": { + "External id": 252528,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918315735.726, "dur": 118.702, + "args": { + "External id": 252529,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918315762.441, "dur": 88.378, + "args": { + "External id": 252530,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2673, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918315772.132, "dur": 74.280, + "args": { + "External id": 252531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918315869.287, "dur": 1.779, + "args": { + "External id": 252532,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2675, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918315947.639, "dur": 1602.916, + "args": { + "External id": 252533,"Sequence number": 2987558, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2676 + } + }, + { + "ph": "f", "id": 186, "pid": 4183438, "tid": 31367, "ts": 667918315947.639, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316055.720, "dur": 99.060, + "args": { + "External id": 252534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918316216.703, "dur": 37.729, + "args": { + "External id": 252535,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918316273.130, "dur": 48.944, + "args": { + "External id": 252536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316333.007, "dur": 25.628, + "args": { + "External id": 252537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316364.568, "dur": 36.534, + "args": { + "External id": 252538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316407.829, "dur": 20.964, + "args": { + "External id": 252539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316436.310, "dur": 29.512, + "args": { + "External id": 252540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918316495.044, "dur": 24.653, + "args": { + "External id": 252541,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918316542.579, "dur": 30.802, + "args": { + "External id": 252542,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918316592.532, "dur": 19.404, + "args": { + "External id": 252543,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918316627.346, "dur": 15.999, + "args": { + "External id": 252544,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316689.570, "dur": 45.567, + "args": { + "External id": 252545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918316740.153, "dur": 31.963, + "args": { + "External id": 252546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918316802.641, "dur": 169.916, + "args": { + "External id": 252547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918316880.407, "dur": 6.119, + "args": { + "External id": 252548,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918316888.447, "dur": 3.096, + "args": { + "External id": 252549,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918317005.268, "dur": 24.662, + "args": { + "External id": 252550,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918317042.681, "dur": 13.804, + "args": { + "External id": 252551,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317064.030, "dur": 41.839, + "args": { + "External id": 252552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317111.774, "dur": 34.536, + "args": { + "External id": 252553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317155.323, "dur": 27.565, + "args": { + "External id": 252554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317187.430, "dur": 50.900, + "args": { + "External id": 252555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317247.504, "dur": 27.853, + "args": { + "External id": 252556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918317281.813, "dur": 45.778, + "args": { + "External id": 252557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918317356.443, "dur": 24.226, + "args": { + "External id": 252558,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918317397.876, "dur": 24.927, + "args": { + "External id": 252559,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918317440.700, "dur": 16.470, + "args": { + "External id": 252560,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918317471.216, "dur": 18.442, + "args": { + "External id": 252561,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918317501.926, "dur": 17.324, + "args": { + "External id": 252562,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317591.526, "dur": 18.171, + "args": { + "External id": 252563,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317594.322, "dur": 14.443, + "args": { + "External id": 252564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317598.398, "dur": 9.509, + "args": { + "External id": 252565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317603.748, "dur": 4.067, + "args": { + "External id": 252566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317613.469, "dur": 6.417, + "args": { + "External id": 252567,"Record function id": 0, "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317614.813, "dur": 4.642, + "args": { + "External id": 252568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317615.439, "dur": 3.490, + "args": { + "External id": 252569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317616.208, "dur": 2.653, + "args": { + "External id": 252570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317623.085, "dur": 4.876, + "args": { + "External id": 252571,"Record function id": 0, "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317624.558, "dur": 2.876, + "args": { + "External id": 252572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317625.271, "dur": 1.765, + "args": { + "External id": 252573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317625.905, "dur": 0.994, + "args": { + "External id": 252574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317631.207, "dur": 3.913, + "args": { + "External id": 252575,"Record function id": 0, "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317632.468, "dur": 2.276, + "args": { + "External id": 252576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317633.176, "dur": 1.180, + "args": { + "External id": 252577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317633.592, "dur": 0.687, + "args": { + "External id": 252578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317638.185, "dur": 3.988, + "args": { + "External id": 252579,"Record function id": 0, "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317639.538, "dur": 2.242, + "args": { + "External id": 252580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317640.008, "dur": 1.355, + "args": { + "External id": 252581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317640.659, "dur": 0.632, + "args": { + "External id": 252582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317645.509, "dur": 4.373, + "args": { + "External id": 252583,"Record function id": 0, "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317646.973, "dur": 2.523, + "args": { + "External id": 252584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317647.651, "dur": 1.452, + "args": { + "External id": 252585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317648.347, "dur": 0.685, + "args": { + "External id": 252586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317694.305, "dur": 7.084, + "args": { + "External id": 252587,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317696.196, "dur": 4.518, + "args": { + "External id": 252588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317697.295, "dur": 2.579, + "args": { + "External id": 252589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317698.496, "dur": 1.200, + "args": { + "External id": 252590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317704.730, "dur": 4.039, + "args": { + "External id": 252591,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317705.880, "dur": 2.482, + "args": { + "External id": 252592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317706.502, "dur": 1.347, + "args": { + "External id": 252593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317707.076, "dur": 0.697, + "args": { + "External id": 252594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317712.045, "dur": 8.868, + "args": { + "External id": 252595,"Record function id": 0, "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918317713.155, "dur": 4.102, + "args": { + "External id": 252596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918317713.620, "dur": 3.232, + "args": { + "External id": 252597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918317714.492, "dur": 2.290, + "args": { + "External id": 252598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918317724.608, "dur": 14736.982, + "args": { + "External id": 252599,"Record function id": 0, "Sequence number": 2987557, "Fwd thread id": 1, "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918317726.034, "dur": 14726.506, + "args": { + "External id": 252600,"Sequence number": 2987557, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2743 + } + }, + { + "ph": "f", "id": 187, "pid": 4183438, "tid": 31367, "ts": 667918317726.034, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918317754.567, "dur": 40.096, + "args": { + "External id": 252601,"Record function id": 0, "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918317801.801, "dur": 63.606, + "args": { + "External id": 252602,"Record function id": 0, "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 4183438, "tid": 31367, + "ts": 667918317872.198, "dur": 14572.860, + "args": { + "External id": 252603,"Record function id": 0, "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918317963.161, "dur": 10.016, + "args": { + "External id": 252604,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918317982.544, "dur": 4.974, + "args": { + "External id": 252605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918318001.597, "dur": 13762.377, + "args": { + "External id": 252606,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918318016.337, "dur": 13739.505, + "args": { + "External id": 252607,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918318047.083, "dur": 14.096, + "args": { + "External id": 252608,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918318065.903, "dur": 13654.835, + "args": { + "External id": 252609,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918318068.301, "dur": 13651.815, + "args": { + "External id": 252610,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918318072.795, "dur": 4.473, + "args": { + "External id": 252611,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918318078.990, "dur": 13637.255, + "args": { + "External id": 252612,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918331850.744, "dur": 8.844, + "args": { + "External id": 252613,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918331853.623, "dur": 5.600, + "args": { + "External id": 252614,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918331890.913, "dur": 266.239, + "args": { + "External id": 252615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918331918.418, "dur": 234.583, + "args": { + "External id": 252616,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2759, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918331930.697, "dur": 216.419, + "args": { + "External id": 252617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918332174.477, "dur": 1.760, + "args": { + "External id": 252618,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2761, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332243.757, "dur": 6.630, + "args": { + "External id": 252619,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332295.934, "dur": 3.265, + "args": { + "External id": 252620,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332313.938, "dur": 1.216, + "args": { + "External id": 252621,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332327.082, "dur": 1.031, + "args": { + "External id": 252622,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332338.244, "dur": 0.793, + "args": { + "External id": 252623,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332348.776, "dur": 2.326, + "args": { + "External id": 252624,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332362.029, "dur": 0.751, + "args": { + "External id": 252625,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332374.112, "dur": 1.541, + "args": { + "External id": 252626,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332386.161, "dur": 0.787, + "args": { + "External id": 252627,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918332476.930, "dur": 2592.904, + "args": { + "External id": 252628,"Record function id": 0, "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918332495.486, "dur": 953.306, + "args": { + "External id": 252629,"Record function id": 0, "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918332509.062, "dur": 334.936, + "args": { + "External id": 252630,"Record function id": 0, "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332583.358, "dur": 5.991, + "args": { + "External id": 252631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332592.495, "dur": 0.994, + "args": { + "External id": 252632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332595.191, "dur": 0.953, + "args": { + "External id": 252633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332598.118, "dur": 0.984, + "args": { + "External id": 252634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332600.855, "dur": 0.743, + "args": { + "External id": 252635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332604.316, "dur": 0.820, + "args": { + "External id": 252636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332606.826, "dur": 1.605, + "args": { + "External id": 252637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332609.672, "dur": 0.945, + "args": { + "External id": 252638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332612.689, "dur": 2.558, + "args": { + "External id": 252639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918332616.934, "dur": 0.698, + "args": { + "External id": 252640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918332634.825, "dur": 178.784, + "args": { + "External id": 252641,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918332650.525, "dur": 157.860, + "args": { + "External id": 252642,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918332701.358, "dur": 13.192, + "args": { + "External id": 252643,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918332717.619, "dur": 63.913, + "args": { + "External id": 252644,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918332720.069, "dur": 61.104, + "args": { + "External id": 252645,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918332724.126, "dur": 6.935, + "args": { + "External id": 252646,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918332732.884, "dur": 47.465, + "args": { + "External id": 252647,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 4183438, "tid": 31367, + "ts": 667918332920.301, "dur": 521.421, + "args": { + "External id": 252648,"Record function id": 0, "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918332935.063, "dur": 495.021, + "args": { + "External id": 252649,"Record function id": 0, "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918332987.436, "dur": 4.730, + "args": { + "External id": 252650,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918333007.466, "dur": 27.216, + "args": { + "External id": 252651,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333011.679, "dur": 1.537, + "args": { + "External id": 252652,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333014.997, "dur": 0.534, + "args": { + "External id": 252653,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333017.019, "dur": 1.088, + "args": { + "External id": 252654,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333019.359, "dur": 2.579, + "args": { + "External id": 252655,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333023.286, "dur": 0.464, + "args": { + "External id": 252656,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333025.812, "dur": 0.359, + "args": { + "External id": 252657,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333027.267, "dur": 0.321, + "args": { + "External id": 252658,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333028.337, "dur": 0.316, + "args": { + "External id": 252659,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333030.633, "dur": 0.284, + "args": { + "External id": 252660,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918333046.398, "dur": 29.148, + "args": { + "External id": 252661,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918333104.498, "dur": 107.315, + "args": { + "External id": 252662,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918333113.937, "dur": 3.400, + "args": { + "External id": 252663,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918333122.954, "dur": 9.210, + "args": { + "External id": 252664,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918333126.991, "dur": 4.773, + "args": { + "External id": 252665,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333130.157, "dur": 0.407, + "args": { + "External id": 252666,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918333138.445, "dur": 21.724, + "args": { + "External id": 252667,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333139.715, "dur": 2.184, + "args": { + "External id": 252668,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333143.196, "dur": 0.581, + "args": { + "External id": 252669,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333145.085, "dur": 0.511, + "args": { + "External id": 252670,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333146.462, "dur": 0.560, + "args": { + "External id": 252671,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333148.098, "dur": 0.788, + "args": { + "External id": 252672,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333149.666, "dur": 0.569, + "args": { + "External id": 252673,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333151.570, "dur": 0.539, + "args": { + "External id": 252674,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333153.263, "dur": 0.346, + "args": { + "External id": 252675,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918333154.639, "dur": 2.010, + "args": { + "External id": 252676,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918333169.669, "dur": 17.580, + "args": { + "External id": 252677,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918333257.466, "dur": 110.284, + "args": { + "External id": 252678,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918333279.662, "dur": 84.542, + "args": { + "External id": 252679,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2822, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918333289.200, "dur": 70.915, + "args": { + "External id": 252680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918333380.568, "dur": 2.266, + "args": { + "External id": 252681,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2824, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918333455.547, "dur": 1593.359, + "args": { + "External id": 252682,"Sequence number": 2987556, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2825 + } + }, + { + "ph": "f", "id": 188, "pid": 4183438, "tid": 31367, "ts": 667918333455.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918333564.022, "dur": 143.550, + "args": { + "External id": 252683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918333753.556, "dur": 38.372, + "args": { + "External id": 252684,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918333811.640, "dur": 48.504, + "args": { + "External id": 252685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918333869.883, "dur": 30.425, + "args": { + "External id": 252686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918333906.451, "dur": 36.367, + "args": { + "External id": 252687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918333949.557, "dur": 21.311, + "args": { + "External id": 252688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918333980.300, "dur": 29.365, + "args": { + "External id": 252689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918334031.930, "dur": 22.512, + "args": { + "External id": 252690,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918334075.435, "dur": 26.700, + "args": { + "External id": 252691,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918334123.321, "dur": 23.729, + "args": { + "External id": 252692,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918334159.266, "dur": 13.889, + "args": { + "External id": 252693,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334182.746, "dur": 51.931, + "args": { + "External id": 252694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334239.841, "dur": 31.977, + "args": { + "External id": 252695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918334299.200, "dur": 169.601, + "args": { + "External id": 252696,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918334377.314, "dur": 6.149, + "args": { + "External id": 252697,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918334385.371, "dur": 2.734, + "args": { + "External id": 252698,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918334497.193, "dur": 23.990, + "args": { + "External id": 252699,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918334531.800, "dur": 14.805, + "args": { + "External id": 252700,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334554.112, "dur": 41.704, + "args": { + "External id": 252701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334602.796, "dur": 31.879, + "args": { + "External id": 252702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334641.465, "dur": 67.520, + "args": { + "External id": 252703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334716.702, "dur": 33.099, + "args": { + "External id": 252704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334759.842, "dur": 26.998, + "args": { + "External id": 252705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918334795.387, "dur": 31.429, + "args": { + "External id": 252706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918334845.535, "dur": 22.346, + "args": { + "External id": 252707,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918334884.577, "dur": 39.683, + "args": { + "External id": 252708,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918334944.430, "dur": 18.436, + "args": { + "External id": 252709,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918334977.232, "dur": 14.321, + "args": { + "External id": 252710,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918335001.705, "dur": 19.698, + "args": { + "External id": 252711,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335096.195, "dur": 16.709, + "args": { + "External id": 252712,"Record function id": 0, "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335099.058, "dur": 12.762, + "args": { + "External id": 252713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335103.126, "dur": 7.744, + "args": { + "External id": 252714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335104.622, "dur": 6.155, + "args": { + "External id": 252715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335116.712, "dur": 4.533, + "args": { + "External id": 252716,"Record function id": 0, "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335118.135, "dur": 2.667, + "args": { + "External id": 252717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335118.875, "dur": 1.416, + "args": { + "External id": 252718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335119.471, "dur": 0.753, + "args": { + "External id": 252719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335124.412, "dur": 4.805, + "args": { + "External id": 252720,"Record function id": 0, "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335126.007, "dur": 2.739, + "args": { + "External id": 252721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335126.793, "dur": 1.505, + "args": { + "External id": 252722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335127.485, "dur": 0.727, + "args": { + "External id": 252723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335135.960, "dur": 3.849, + "args": { + "External id": 252724,"Record function id": 0, "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335137.124, "dur": 2.266, + "args": { + "External id": 252725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335137.694, "dur": 1.279, + "args": { + "External id": 252726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335138.046, "dur": 0.845, + "args": { + "External id": 252727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335143.202, "dur": 3.445, + "args": { + "External id": 252728,"Record function id": 0, "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335144.210, "dur": 1.990, + "args": { + "External id": 252729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335144.723, "dur": 1.001, + "args": { + "External id": 252730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335145.113, "dur": 0.539, + "args": { + "External id": 252731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335149.933, "dur": 4.046, + "args": { + "External id": 252732,"Record function id": 0, "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335150.866, "dur": 2.696, + "args": { + "External id": 252733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335151.342, "dur": 1.814, + "args": { + "External id": 252734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335152.358, "dur": 0.725, + "args": { + "External id": 252735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335157.182, "dur": 3.534, + "args": { + "External id": 252736,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335158.295, "dur": 1.991, + "args": { + "External id": 252737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335158.799, "dur": 1.061, + "args": { + "External id": 252738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335159.222, "dur": 0.527, + "args": { + "External id": 252739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335163.996, "dur": 5.396, + "args": { + "External id": 252740,"Record function id": 0, "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335165.093, "dur": 3.875, + "args": { + "External id": 252741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335165.577, "dur": 2.963, + "args": { + "External id": 252742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335166.302, "dur": 2.173, + "args": { + "External id": 252743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335172.568, "dur": 3.652, + "args": { + "External id": 252744,"Record function id": 0, "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918335173.626, "dur": 2.182, + "args": { + "External id": 252745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918335174.184, "dur": 1.201, + "args": { + "External id": 252746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918335174.818, "dur": 0.503, + "args": { + "External id": 252747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918335179.873, "dur": 14867.706, + "args": { + "External id": 252748,"Record function id": 0, "Sequence number": 2987555, "Fwd thread id": 1, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918335181.066, "dur": 14858.160, + "args": { + "External id": 252749,"Sequence number": 2987555, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2892 + } + }, + { + "ph": "f", "id": 189, "pid": 4183438, "tid": 31367, "ts": 667918335181.066, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918335230.589, "dur": 39.582, + "args": { + "External id": 252750,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918335277.932, "dur": 66.484, + "args": { + "External id": 252751,"Record function id": 0, "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 4183438, "tid": 31367, + "ts": 667918335351.524, "dur": 14680.100, + "args": { + "External id": 252752,"Record function id": 0, "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918335444.276, "dur": 6.889, + "args": { + "External id": 252753,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918335465.412, "dur": 4.996, + "args": { + "External id": 252754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918335484.094, "dur": 13848.298, + "args": { + "External id": 252755,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918335497.181, "dur": 13827.080, + "args": { + "External id": 252756,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918335521.878, "dur": 14.326, + "args": { + "External id": 252757,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918335541.338, "dur": 13748.065, + "args": { + "External id": 252758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918335543.805, "dur": 13744.898, + "args": { + "External id": 252759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918335547.864, "dur": 4.670, + "args": { + "External id": 252760,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918335553.988, "dur": 13730.900, + "args": { + "External id": 252761,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918349421.507, "dur": 8.811, + "args": { + "External id": 252762,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918349424.251, "dur": 5.794, + "args": { + "External id": 252763,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918349457.578, "dur": 296.804, + "args": { + "External id": 252764,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918349483.000, "dur": 266.582, + "args": { + "External id": 252765,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2908, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918349494.747, "dur": 249.691, + "args": { + "External id": 252766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918349773.611, "dur": 2.126, + "args": { + "External id": 252767,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2910, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349831.625, "dur": 6.818, + "args": { + "External id": 252768,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349882.923, "dur": 1.560, + "args": { + "External id": 252769,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349899.561, "dur": 1.505, + "args": { + "External id": 252770,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349912.570, "dur": 0.757, + "args": { + "External id": 252771,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349925.718, "dur": 0.829, + "args": { + "External id": 252772,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349938.763, "dur": 0.709, + "args": { + "External id": 252773,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349949.296, "dur": 0.830, + "args": { + "External id": 252774,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349960.072, "dur": 1.912, + "args": { + "External id": 252775,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918349970.696, "dur": 0.798, + "args": { + "External id": 252776,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918350062.572, "dur": 2623.488, + "args": { + "External id": 252777,"Record function id": 0, "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918350080.858, "dur": 984.276, + "args": { + "External id": 252778,"Record function id": 0, "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918350094.208, "dur": 316.101, + "args": { + "External id": 252779,"Record function id": 0, "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350170.465, "dur": 4.086, + "args": { + "External id": 252780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350177.961, "dur": 1.216, + "args": { + "External id": 252781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350180.818, "dur": 1.104, + "args": { + "External id": 252782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350183.815, "dur": 1.095, + "args": { + "External id": 252783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350186.464, "dur": 0.838, + "args": { + "External id": 252784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350188.745, "dur": 0.666, + "args": { + "External id": 252785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350190.964, "dur": 19.044, + "args": { + "External id": 252786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350214.211, "dur": 2.616, + "args": { + "External id": 252787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350218.491, "dur": 0.790, + "args": { + "External id": 252788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918350220.622, "dur": 0.766, + "args": { + "External id": 252789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918350241.344, "dur": 140.246, + "args": { + "External id": 252790,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918350258.551, "dur": 118.800, + "args": { + "External id": 252791,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918350270.879, "dur": 14.596, + "args": { + "External id": 252792,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918350288.571, "dur": 62.519, + "args": { + "External id": 252793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918350291.015, "dur": 59.741, + "args": { + "External id": 252794,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350295.309, "dur": 5.961, + "args": { + "External id": 252795,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918350303.170, "dur": 46.927, + "args": { + "External id": 252796,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 4183438, "tid": 31367, + "ts": 667918350486.695, "dur": 570.896, + "args": { + "External id": 252797,"Record function id": 0, "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918350503.509, "dur": 542.220, + "args": { + "External id": 252798,"Record function id": 0, "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918350557.627, "dur": 4.623, + "args": { + "External id": 252799,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918350577.774, "dur": 27.859, + "args": { + "External id": 252800,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350582.071, "dur": 1.866, + "args": { + "External id": 252801,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350585.820, "dur": 0.272, + "args": { + "External id": 252802,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350587.050, "dur": 2.016, + "args": { + "External id": 252803,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350590.901, "dur": 0.651, + "args": { + "External id": 252804,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350592.969, "dur": 0.741, + "args": { + "External id": 252805,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350595.092, "dur": 0.362, + "args": { + "External id": 252806,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350597.741, "dur": 0.812, + "args": { + "External id": 252807,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350599.422, "dur": 0.510, + "args": { + "External id": 252808,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350601.240, "dur": 0.463, + "args": { + "External id": 252809,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918350615.175, "dur": 28.632, + "args": { + "External id": 252810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918350716.895, "dur": 106.769, + "args": { + "External id": 252811,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918350727.282, "dur": 5.004, + "args": { + "External id": 252812,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918350738.269, "dur": 12.573, + "args": { + "External id": 252813,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918350742.380, "dur": 8.009, + "args": { + "External id": 252814,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350746.258, "dur": 2.486, + "args": { + "External id": 252815,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918350757.742, "dur": 24.507, + "args": { + "External id": 252816,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350759.946, "dur": 0.555, + "args": { + "External id": 252817,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350762.069, "dur": 0.355, + "args": { + "External id": 252818,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350764.017, "dur": 0.473, + "args": { + "External id": 252819,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350766.050, "dur": 1.003, + "args": { + "External id": 252820,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350768.299, "dur": 1.021, + "args": { + "External id": 252821,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350770.664, "dur": 0.671, + "args": { + "External id": 252822,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350772.459, "dur": 0.553, + "args": { + "External id": 252823,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350774.799, "dur": 2.340, + "args": { + "External id": 252824,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918350778.416, "dur": 0.716, + "args": { + "External id": 252825,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918350796.169, "dur": 19.973, + "args": { + "External id": 252826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918350868.737, "dur": 111.238, + "args": { + "External id": 252827,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918350892.988, "dur": 83.335, + "args": { + "External id": 252828,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2971, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918350902.542, "dur": 69.933, + "args": { + "External id": 252829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918350996.801, "dur": 1.995, + "args": { + "External id": 252830,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2973, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918351072.090, "dur": 1558.858, + "args": { + "External id": 252831,"Sequence number": 2987554, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 2974 + } + }, + { + "ph": "f", "id": 190, "pid": 4183438, "tid": 31367, "ts": 667918351072.090, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351178.625, "dur": 119.917, + "args": { + "External id": 252832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918351340.466, "dur": 37.934, + "args": { + "External id": 252833,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918351397.026, "dur": 40.728, + "args": { + "External id": 252834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351446.973, "dur": 25.365, + "args": { + "External id": 252835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351477.950, "dur": 31.619, + "args": { + "External id": 252836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351516.084, "dur": 20.480, + "args": { + "External id": 252837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351545.099, "dur": 29.423, + "args": { + "External id": 252838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918351595.932, "dur": 20.850, + "args": { + "External id": 252839,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918351635.466, "dur": 69.445, + "args": { + "External id": 252840,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918351727.510, "dur": 19.393, + "args": { + "External id": 252841,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918351762.198, "dur": 13.906, + "args": { + "External id": 252842,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351786.990, "dur": 43.738, + "args": { + "External id": 252843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918351834.268, "dur": 34.096, + "args": { + "External id": 252844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918351894.427, "dur": 172.537, + "args": { + "External id": 252845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918351973.894, "dur": 5.817, + "args": { + "External id": 252846,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918351981.314, "dur": 5.600, + "args": { + "External id": 252847,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918352095.551, "dur": 22.516, + "args": { + "External id": 252848,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918352130.021, "dur": 14.562, + "args": { + "External id": 252849,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352154.394, "dur": 55.711, + "args": { + "External id": 252850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352218.327, "dur": 35.142, + "args": { + "External id": 252851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352262.204, "dur": 30.939, + "args": { + "External id": 252852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352297.690, "dur": 29.091, + "args": { + "External id": 252853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352338.104, "dur": 27.514, + "args": { + "External id": 252854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918352372.417, "dur": 27.464, + "args": { + "External id": 252855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918352416.634, "dur": 21.312, + "args": { + "External id": 252856,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918352454.615, "dur": 22.742, + "args": { + "External id": 252857,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918352507.741, "dur": 26.937, + "args": { + "External id": 252858,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918352553.721, "dur": 14.741, + "args": { + "External id": 252859,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918352581.046, "dur": 18.358, + "args": { + "External id": 252860,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352713.461, "dur": 15.958, + "args": { + "External id": 252861,"Record function id": 0, "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352716.974, "dur": 11.456, + "args": { + "External id": 252862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352721.155, "dur": 6.147, + "args": { + "External id": 252863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352722.660, "dur": 4.549, + "args": { + "External id": 252864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352733.227, "dur": 4.511, + "args": { + "External id": 252865,"Record function id": 0, "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352734.542, "dur": 2.724, + "args": { + "External id": 252866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352735.470, "dur": 1.359, + "args": { + "External id": 252867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352735.914, "dur": 0.780, + "args": { + "External id": 252868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352741.042, "dur": 4.019, + "args": { + "External id": 252869,"Record function id": 0, "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352741.903, "dur": 2.726, + "args": { + "External id": 252870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352742.649, "dur": 1.563, + "args": { + "External id": 252871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352743.316, "dur": 0.782, + "args": { + "External id": 252872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352751.739, "dur": 4.652, + "args": { + "External id": 252873,"Record function id": 0, "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352753.395, "dur": 2.585, + "args": { + "External id": 252874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352754.232, "dur": 1.328, + "args": { + "External id": 252875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352754.751, "dur": 0.700, + "args": { + "External id": 252876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352759.463, "dur": 4.750, + "args": { + "External id": 252877,"Record function id": 0, "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352760.791, "dur": 2.951, + "args": { + "External id": 252878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352761.505, "dur": 1.777, + "args": { + "External id": 252879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352762.366, "dur": 0.823, + "args": { + "External id": 252880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352767.248, "dur": 4.078, + "args": { + "External id": 252881,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352768.325, "dur": 2.607, + "args": { + "External id": 252882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352769.002, "dur": 1.528, + "args": { + "External id": 252883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352769.543, "dur": 0.895, + "args": { + "External id": 252884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352774.588, "dur": 5.510, + "args": { + "External id": 252885,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352775.498, "dur": 4.153, + "args": { + "External id": 252886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352775.978, "dur": 3.254, + "args": { + "External id": 252887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352776.495, "dur": 2.653, + "args": { + "External id": 252888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352783.397, "dur": 3.612, + "args": { + "External id": 252889,"Record function id": 0, "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352784.251, "dur": 2.370, + "args": { + "External id": 252890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352784.751, "dur": 1.476, + "args": { + "External id": 252891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352785.437, "dur": 0.690, + "args": { + "External id": 252892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352790.108, "dur": 3.431, + "args": { + "External id": 252893,"Record function id": 0, "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918352790.942, "dur": 2.209, + "args": { + "External id": 252894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918352791.451, "dur": 1.303, + "args": { + "External id": 252895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918352792.028, "dur": 0.622, + "args": { + "External id": 252896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918352796.989, "dur": 15137.184, + "args": { + "External id": 252897,"Record function id": 0, "Sequence number": 2987553, "Fwd thread id": 1, "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918352798.270, "dur": 15126.887, + "args": { + "External id": 252898,"Sequence number": 2987553, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3041 + } + }, + { + "ph": "f", "id": 191, "pid": 4183438, "tid": 31367, "ts": 667918352798.270, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918352832.595, "dur": 44.586, + "args": { + "External id": 252899,"Record function id": 0, "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918352884.667, "dur": 68.379, + "args": { + "External id": 252900,"Record function id": 0, "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 4183438, "tid": 31367, + "ts": 667918352959.561, "dur": 14957.944, + "args": { + "External id": 252901,"Record function id": 0, "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918353051.078, "dur": 6.571, + "args": { + "External id": 252902,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918353066.704, "dur": 4.811, + "args": { + "External id": 252903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918353084.267, "dur": 14076.587, + "args": { + "External id": 252904,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918353108.794, "dur": 14043.778, + "args": { + "External id": 252905,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918353130.762, "dur": 14.032, + "args": { + "External id": 252906,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918353149.307, "dur": 13967.545, + "args": { + "External id": 252907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918353151.766, "dur": 13964.418, + "args": { + "External id": 252908,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918353158.929, "dur": 4.674, + "args": { + "External id": 252909,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918353165.321, "dur": 13947.443, + "args": { + "External id": 252910,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918367262.857, "dur": 9.597, + "args": { + "External id": 252911,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918367265.655, "dur": 6.319, + "args": { + "External id": 252912,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918367300.035, "dur": 309.502, + "args": { + "External id": 252913,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918367326.301, "dur": 278.663, + "args": { + "External id": 252914,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3057, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918367337.401, "dur": 262.744, + "args": { + "External id": 252915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918367626.269, "dur": 2.015, + "args": { + "External id": 252916,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3059, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367716.435, "dur": 8.441, + "args": { + "External id": 252917,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367769.737, "dur": 1.525, + "args": { + "External id": 252918,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367786.098, "dur": 1.383, + "args": { + "External id": 252919,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367799.251, "dur": 1.046, + "args": { + "External id": 252920,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367811.232, "dur": 2.653, + "args": { + "External id": 252921,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367822.906, "dur": 0.977, + "args": { + "External id": 252922,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367834.306, "dur": 0.843, + "args": { + "External id": 252923,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367845.412, "dur": 1.634, + "args": { + "External id": 252924,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918367856.561, "dur": 2.634, + "args": { + "External id": 252925,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918367949.601, "dur": 2573.660, + "args": { + "External id": 252926,"Record function id": 0, "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918367968.027, "dur": 984.902, + "args": { + "External id": 252927,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918367981.604, "dur": 328.220, + "args": { + "External id": 252928,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368074.681, "dur": 4.028, + "args": { + "External id": 252929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368081.755, "dur": 0.945, + "args": { + "External id": 252930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368084.363, "dur": 0.940, + "args": { + "External id": 252931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368086.818, "dur": 0.702, + "args": { + "External id": 252932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368088.931, "dur": 0.588, + "args": { + "External id": 252933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368091.052, "dur": 0.856, + "args": { + "External id": 252934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368093.494, "dur": 3.238, + "args": { + "External id": 252935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368098.239, "dur": 0.680, + "args": { + "External id": 252936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368100.341, "dur": 0.516, + "args": { + "External id": 252937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918368102.249, "dur": 0.497, + "args": { + "External id": 252938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918368120.339, "dur": 158.499, + "args": { + "External id": 252939,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918368136.947, "dur": 137.381, + "args": { + "External id": 252940,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918368149.142, "dur": 12.532, + "args": { + "External id": 252941,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918368164.830, "dur": 80.592, + "args": { + "External id": 252942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918368167.286, "dur": 77.762, + "args": { + "External id": 252943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368171.503, "dur": 5.892, + "args": { + "External id": 252944,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918368178.971, "dur": 64.783, + "args": { + "External id": 252945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 4183438, "tid": 31367, + "ts": 667918368387.906, "dur": 557.017, + "args": { + "External id": 252946,"Record function id": 0, "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918368403.497, "dur": 529.663, + "args": { + "External id": 252947,"Record function id": 0, "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918368459.127, "dur": 5.279, + "args": { + "External id": 252948,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918368479.445, "dur": 27.444, + "args": { + "External id": 252949,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368483.985, "dur": 1.583, + "args": { + "External id": 252950,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368487.557, "dur": 2.434, + "args": { + "External id": 252951,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368491.693, "dur": 0.644, + "args": { + "External id": 252952,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368493.937, "dur": 0.269, + "args": { + "External id": 252953,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368495.350, "dur": 0.347, + "args": { + "External id": 252954,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368496.713, "dur": 0.453, + "args": { + "External id": 252955,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368498.579, "dur": 0.485, + "args": { + "External id": 252956,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368500.837, "dur": 0.367, + "args": { + "External id": 252957,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368502.638, "dur": 0.341, + "args": { + "External id": 252958,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918368518.483, "dur": 27.901, + "args": { + "External id": 252959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918368575.831, "dur": 134.291, + "args": { + "External id": 252960,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918368585.128, "dur": 4.830, + "args": { + "External id": 252961,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918368594.935, "dur": 9.451, + "args": { + "External id": 252962,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918368598.787, "dur": 5.198, + "args": { + "External id": 252963,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368602.397, "dur": 0.421, + "args": { + "External id": 252964,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918368610.806, "dur": 22.132, + "args": { + "External id": 252965,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368612.874, "dur": 0.458, + "args": { + "External id": 252966,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368614.845, "dur": 0.191, + "args": { + "External id": 252967,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368616.457, "dur": 0.764, + "args": { + "External id": 252968,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368618.716, "dur": 0.398, + "args": { + "External id": 252969,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368620.565, "dur": 0.473, + "args": { + "External id": 252970,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368622.720, "dur": 0.509, + "args": { + "External id": 252971,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368624.427, "dur": 2.187, + "args": { + "External id": 252972,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368627.616, "dur": 0.286, + "args": { + "External id": 252973,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918368629.410, "dur": 0.350, + "args": { + "External id": 252974,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918368645.052, "dur": 55.196, + "args": { + "External id": 252975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918368755.512, "dur": 112.066, + "args": { + "External id": 252976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918368779.810, "dur": 84.518, + "args": { + "External id": 252977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3120, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918368789.609, "dur": 70.315, + "args": { + "External id": 252978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918368881.737, "dur": 1.729, + "args": { + "External id": 252979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3122, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918368960.306, "dur": 1542.352, + "args": { + "External id": 252980,"Sequence number": 2987552, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3123 + } + }, + { + "ph": "f", "id": 192, "pid": 4183438, "tid": 31367, "ts": 667918368960.306, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369068.994, "dur": 99.508, + "args": { + "External id": 252981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918369224.265, "dur": 37.735, + "args": { + "External id": 252982,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918369282.388, "dur": 46.601, + "args": { + "External id": 252983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369338.242, "dur": 25.121, + "args": { + "External id": 252984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369368.795, "dur": 32.808, + "args": { + "External id": 252985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369410.066, "dur": 19.490, + "args": { + "External id": 252986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369436.886, "dur": 28.509, + "args": { + "External id": 252987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918369490.127, "dur": 19.337, + "args": { + "External id": 252988,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918369524.615, "dur": 29.813, + "args": { + "External id": 252989,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918369570.738, "dur": 17.422, + "args": { + "External id": 252990,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918369600.295, "dur": 14.170, + "args": { + "External id": 252991,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369624.176, "dur": 70.653, + "args": { + "External id": 252992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918369700.573, "dur": 34.484, + "args": { + "External id": 252993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918369767.669, "dur": 173.130, + "args": { + "External id": 252994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918369847.911, "dur": 5.925, + "args": { + "External id": 252995,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918369855.549, "dur": 5.314, + "args": { + "External id": 252996,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918369971.331, "dur": 24.227, + "args": { + "External id": 252997,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918370007.665, "dur": 16.412, + "args": { + "External id": 252998,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370032.171, "dur": 46.578, + "args": { + "External id": 252999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370083.980, "dur": 35.207, + "args": { + "External id": 253000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370124.938, "dur": 27.045, + "args": { + "External id": 253001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370158.322, "dur": 27.757, + "args": { + "External id": 253002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370191.320, "dur": 45.848, + "args": { + "External id": 253003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918370246.051, "dur": 28.004, + "args": { + "External id": 253004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918370290.079, "dur": 23.238, + "args": { + "External id": 253005,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918370328.512, "dur": 37.417, + "args": { + "External id": 253006,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918370386.756, "dur": 21.145, + "args": { + "External id": 253007,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918370424.028, "dur": 15.569, + "args": { + "External id": 253008,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918370452.669, "dur": 17.897, + "args": { + "External id": 253009,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370550.359, "dur": 15.134, + "args": { + "External id": 253010,"Record function id": 0, "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370553.743, "dur": 10.756, + "args": { + "External id": 253011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370558.166, "dur": 5.436, + "args": { + "External id": 253012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370559.276, "dur": 4.172, + "args": { + "External id": 253013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370569.454, "dur": 5.183, + "args": { + "External id": 253014,"Record function id": 0, "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370570.926, "dur": 3.277, + "args": { + "External id": 253015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370571.887, "dur": 1.902, + "args": { + "External id": 253016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370572.663, "dur": 1.036, + "args": { + "External id": 253017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370577.786, "dur": 4.205, + "args": { + "External id": 253018,"Record function id": 0, "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370579.058, "dur": 2.500, + "args": { + "External id": 253019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370579.547, "dur": 1.592, + "args": { + "External id": 253020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370580.300, "dur": 0.756, + "args": { + "External id": 253021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370585.112, "dur": 9.923, + "args": { + "External id": 253022,"Record function id": 0, "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370586.217, "dur": 5.139, + "args": { + "External id": 253023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370586.942, "dur": 3.987, + "args": { + "External id": 253024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370587.571, "dur": 3.271, + "args": { + "External id": 253025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370598.222, "dur": 4.242, + "args": { + "External id": 253026,"Record function id": 0, "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370599.498, "dur": 2.459, + "args": { + "External id": 253027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370600.196, "dur": 1.344, + "args": { + "External id": 253028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370600.619, "dur": 0.845, + "args": { + "External id": 253029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370605.537, "dur": 3.750, + "args": { + "External id": 253030,"Record function id": 0, "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370606.743, "dur": 2.161, + "args": { + "External id": 253031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370607.237, "dur": 1.250, + "args": { + "External id": 253032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370607.827, "dur": 0.555, + "args": { + "External id": 253033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370612.501, "dur": 3.812, + "args": { + "External id": 253034,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370613.952, "dur": 1.925, + "args": { + "External id": 253035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370614.492, "dur": 1.000, + "args": { + "External id": 253036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370614.870, "dur": 0.529, + "args": { + "External id": 253037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370619.358, "dur": 3.781, + "args": { + "External id": 253038,"Record function id": 0, "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370620.772, "dur": 1.950, + "args": { + "External id": 253039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370621.301, "dur": 1.026, + "args": { + "External id": 253040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370621.594, "dur": 0.631, + "args": { + "External id": 253041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370626.219, "dur": 3.972, + "args": { + "External id": 253042,"Record function id": 0, "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918370627.392, "dur": 2.405, + "args": { + "External id": 253043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918370627.918, "dur": 1.460, + "args": { + "External id": 253044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918370628.589, "dur": 0.714, + "args": { + "External id": 253045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918370633.677, "dur": 15158.232, + "args": { + "External id": 253046,"Record function id": 0, "Sequence number": 2987551, "Fwd thread id": 1, "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918370634.901, "dur": 15148.449, + "args": { + "External id": 253047,"Sequence number": 2987551, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3190 + } + }, + { + "ph": "f", "id": 193, "pid": 4183438, "tid": 31367, "ts": 667918370634.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918370705.475, "dur": 42.407, + "args": { + "External id": 253048,"Record function id": 0, "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918370756.316, "dur": 68.675, + "args": { + "External id": 253049,"Record function id": 0, "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 4183438, "tid": 31367, + "ts": 667918370834.857, "dur": 14940.434, + "args": { + "External id": 253050,"Record function id": 0, "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918370930.165, "dur": 6.613, + "args": { + "External id": 253051,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918370946.873, "dur": 4.983, + "args": { + "External id": 253052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918370965.075, "dur": 14104.527, + "args": { + "External id": 253053,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918370978.261, "dur": 14083.343, + "args": { + "External id": 253054,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918370997.890, "dur": 16.019, + "args": { + "External id": 253055,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918371019.157, "dur": 14006.815, + "args": { + "External id": 253056,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918371021.933, "dur": 14003.364, + "args": { + "External id": 253057,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918371026.191, "dur": 4.383, + "args": { + "External id": 253058,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918371032.394, "dur": 13989.372, + "args": { + "External id": 253059,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918385158.133, "dur": 8.932, + "args": { + "External id": 253060,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918385161.192, "dur": 5.570, + "args": { + "External id": 253061,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918385205.540, "dur": 262.397, + "args": { + "External id": 253062,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918385231.527, "dur": 232.080, + "args": { + "External id": 253063,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3206, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918385242.456, "dur": 216.558, + "args": { + "External id": 253064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918385484.617, "dur": 1.961, + "args": { + "External id": 253065,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3208, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385540.982, "dur": 6.422, + "args": { + "External id": 253066,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385590.697, "dur": 1.306, + "args": { + "External id": 253067,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385606.142, "dur": 1.216, + "args": { + "External id": 253068,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385620.050, "dur": 0.653, + "args": { + "External id": 253069,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385631.329, "dur": 0.999, + "args": { + "External id": 253070,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385641.703, "dur": 0.764, + "args": { + "External id": 253071,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385687.061, "dur": 1.477, + "args": { + "External id": 253072,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385704.610, "dur": 1.626, + "args": { + "External id": 253073,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918385715.875, "dur": 0.870, + "args": { + "External id": 253074,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918385807.693, "dur": 2570.729, + "args": { + "External id": 253075,"Record function id": 0, "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918385827.115, "dur": 963.159, + "args": { + "External id": 253076,"Record function id": 0, "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918385842.345, "dur": 290.462, + "args": { + "External id": 253077,"Record function id": 0, "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385918.364, "dur": 4.644, + "args": { + "External id": 253078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385926.208, "dur": 1.032, + "args": { + "External id": 253079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385929.459, "dur": 0.768, + "args": { + "External id": 253080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385932.001, "dur": 2.819, + "args": { + "External id": 253081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385936.450, "dur": 0.714, + "args": { + "External id": 253082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385938.469, "dur": 0.848, + "args": { + "External id": 253083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385941.003, "dur": 1.830, + "args": { + "External id": 253084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385944.124, "dur": 0.853, + "args": { + "External id": 253085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385946.172, "dur": 0.604, + "args": { + "External id": 253086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918385948.138, "dur": 0.592, + "args": { + "External id": 253087,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918385966.364, "dur": 140.277, + "args": { + "External id": 253088,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918385982.057, "dur": 120.458, + "args": { + "External id": 253089,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918385993.765, "dur": 13.836, + "args": { + "External id": 253090,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918386010.223, "dur": 64.446, + "args": { + "External id": 253091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918386013.525, "dur": 60.755, + "args": { + "External id": 253092,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386017.479, "dur": 6.795, + "args": { + "External id": 253093,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918386026.200, "dur": 47.476, + "args": { + "External id": 253094,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 4183438, "tid": 31367, + "ts": 667918386230.208, "dur": 552.004, + "args": { + "External id": 253095,"Record function id": 0, "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918386248.338, "dur": 520.896, + "args": { + "External id": 253096,"Record function id": 0, "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918386305.696, "dur": 5.127, + "args": { + "External id": 253097,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918386326.198, "dur": 24.685, + "args": { + "External id": 253098,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386330.448, "dur": 1.758, + "args": { + "External id": 253099,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386334.028, "dur": 0.754, + "args": { + "External id": 253100,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386335.900, "dur": 0.403, + "args": { + "External id": 253101,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386337.220, "dur": 0.487, + "args": { + "External id": 253102,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386338.725, "dur": 0.389, + "args": { + "External id": 253103,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386340.049, "dur": 0.624, + "args": { + "External id": 253104,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386341.448, "dur": 2.403, + "args": { + "External id": 253105,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386344.865, "dur": 0.259, + "args": { + "External id": 253106,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386345.953, "dur": 0.661, + "args": { + "External id": 253107,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918386360.972, "dur": 31.857, + "args": { + "External id": 253108,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918386423.433, "dur": 86.766, + "args": { + "External id": 253109,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918386432.701, "dur": 3.300, + "args": { + "External id": 253110,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918386441.130, "dur": 9.344, + "args": { + "External id": 253111,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918386445.080, "dur": 5.000, + "args": { + "External id": 253112,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386448.314, "dur": 0.573, + "args": { + "External id": 253113,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918386457.100, "dur": 18.835, + "args": { + "External id": 253114,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386458.519, "dur": 0.553, + "args": { + "External id": 253115,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386460.630, "dur": 0.680, + "args": { + "External id": 253116,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386462.718, "dur": 0.474, + "args": { + "External id": 253117,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386464.197, "dur": 2.021, + "args": { + "External id": 253118,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386467.267, "dur": 0.310, + "args": { + "External id": 253119,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386468.470, "dur": 0.553, + "args": { + "External id": 253120,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386469.938, "dur": 0.409, + "args": { + "External id": 253121,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386471.228, "dur": 0.374, + "args": { + "External id": 253122,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918386472.503, "dur": 0.475, + "args": { + "External id": 253123,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918386484.957, "dur": 18.117, + "args": { + "External id": 253124,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918386550.999, "dur": 146.138, + "args": { + "External id": 253125,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918386571.905, "dur": 121.078, + "args": { + "External id": 253126,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3269, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918386580.801, "dur": 68.359, + "args": { + "External id": 253127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918386713.733, "dur": 2.581, + "args": { + "External id": 253128,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3271, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918386798.477, "dur": 1561.527, + "args": { + "External id": 253129,"Sequence number": 2987550, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3272 + } + }, + { + "ph": "f", "id": 194, "pid": 4183438, "tid": 31367, "ts": 667918386798.477, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918386910.069, "dur": 104.069, + "args": { + "External id": 253130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918387054.209, "dur": 36.090, + "args": { + "External id": 253131,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918387106.336, "dur": 43.202, + "args": { + "External id": 253132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387158.473, "dur": 27.153, + "args": { + "External id": 253133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387215.102, "dur": 39.200, + "args": { + "External id": 253134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387268.294, "dur": 22.031, + "args": { + "External id": 253135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387297.126, "dur": 29.636, + "args": { + "External id": 253136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918387350.328, "dur": 23.537, + "args": { + "External id": 253137,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918387392.776, "dur": 26.758, + "args": { + "External id": 253138,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918387436.031, "dur": 17.496, + "args": { + "External id": 253139,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918387465.319, "dur": 14.196, + "args": { + "External id": 253140,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387487.139, "dur": 37.897, + "args": { + "External id": 253141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387528.527, "dur": 29.670, + "args": { + "External id": 253142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918387587.786, "dur": 213.402, + "args": { + "External id": 253143,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918387704.346, "dur": 7.339, + "args": { + "External id": 253144,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918387713.738, "dur": 2.915, + "args": { + "External id": 253145,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918387831.814, "dur": 23.513, + "args": { + "External id": 253146,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918387866.092, "dur": 13.239, + "args": { + "External id": 253147,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387887.106, "dur": 50.844, + "args": { + "External id": 253148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387943.820, "dur": 31.679, + "args": { + "External id": 253149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918387983.722, "dur": 27.617, + "args": { + "External id": 253150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918388015.809, "dur": 28.297, + "args": { + "External id": 253151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918388050.403, "dur": 26.884, + "args": { + "External id": 253152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918388082.637, "dur": 27.878, + "args": { + "External id": 253153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918388125.822, "dur": 20.751, + "args": { + "External id": 253154,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918388161.852, "dur": 23.166, + "args": { + "External id": 253155,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918388232.407, "dur": 29.123, + "args": { + "External id": 253156,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918388282.474, "dur": 16.771, + "args": { + "External id": 253157,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918388311.350, "dur": 17.420, + "args": { + "External id": 253158,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388404.336, "dur": 15.195, + "args": { + "External id": 253159,"Record function id": 0, "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388407.553, "dur": 11.118, + "args": { + "External id": 253160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388411.902, "dur": 5.959, + "args": { + "External id": 253161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388413.482, "dur": 4.298, + "args": { + "External id": 253162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388423.145, "dur": 4.254, + "args": { + "External id": 253163,"Record function id": 0, "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388424.298, "dur": 2.690, + "args": { + "External id": 253164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388425.058, "dur": 1.437, + "args": { + "External id": 253165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388425.702, "dur": 0.719, + "args": { + "External id": 253166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388430.641, "dur": 5.316, + "args": { + "External id": 253167,"Record function id": 0, "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388431.537, "dur": 3.972, + "args": { + "External id": 253168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388432.162, "dur": 2.830, + "args": { + "External id": 253169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388432.697, "dur": 2.211, + "args": { + "External id": 253170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388439.084, "dur": 4.184, + "args": { + "External id": 253171,"Record function id": 0, "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388440.598, "dur": 2.271, + "args": { + "External id": 253172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388441.118, "dur": 1.330, + "args": { + "External id": 253173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388441.587, "dur": 0.778, + "args": { + "External id": 253174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388446.291, "dur": 3.743, + "args": { + "External id": 253175,"Record function id": 0, "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388447.385, "dur": 2.226, + "args": { + "External id": 253176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388447.887, "dur": 1.306, + "args": { + "External id": 253177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388448.532, "dur": 0.588, + "args": { + "External id": 253178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388456.524, "dur": 3.694, + "args": { + "External id": 253179,"Record function id": 0, "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388457.647, "dur": 2.148, + "args": { + "External id": 253180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388458.380, "dur": 1.006, + "args": { + "External id": 253181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388458.749, "dur": 0.564, + "args": { + "External id": 253182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388463.391, "dur": 3.791, + "args": { + "External id": 253183,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388464.608, "dur": 2.148, + "args": { + "External id": 253184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388465.086, "dur": 1.247, + "args": { + "External id": 253185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388465.638, "dur": 0.624, + "args": { + "External id": 253186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388470.194, "dur": 3.665, + "args": { + "External id": 253187,"Record function id": 0, "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388471.324, "dur": 2.130, + "args": { + "External id": 253188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388471.792, "dur": 1.236, + "args": { + "External id": 253189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388472.372, "dur": 0.580, + "args": { + "External id": 253190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388476.825, "dur": 3.581, + "args": { + "External id": 253191,"Record function id": 0, "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918388477.857, "dur": 2.148, + "args": { + "External id": 253192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918388478.390, "dur": 1.214, + "args": { + "External id": 253193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918388478.983, "dur": 0.553, + "args": { + "External id": 253194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918388483.895, "dur": 17412.765, + "args": { + "External id": 253195,"Record function id": 0, "Sequence number": 2987549, "Fwd thread id": 1, "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918388485.113, "dur": 17402.835, + "args": { + "External id": 253196,"Sequence number": 2987549, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3339 + } + }, + { + "ph": "f", "id": 195, "pid": 4183438, "tid": 31367, "ts": 667918388485.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918388515.430, "dur": 35.573, + "args": { + "External id": 253197,"Record function id": 0, "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918388558.181, "dur": 65.790, + "args": { + "External id": 253198,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 4183438, "tid": 31367, + "ts": 667918388633.177, "dur": 17247.172, + "args": { + "External id": 253199,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918388768.099, "dur": 7.145, + "args": { + "External id": 253200,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918388785.907, "dur": 6.652, + "args": { + "External id": 253201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918388810.256, "dur": 16289.910, + "args": { + "External id": 253202,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918388823.526, "dur": 16268.239, + "args": { + "External id": 253203,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918388843.160, "dur": 14.492, + "args": { + "External id": 253204,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918388866.155, "dur": 16181.520, + "args": { + "External id": 253205,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918388868.500, "dur": 16178.393, + "args": { + "External id": 253206,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918388872.967, "dur": 4.628, + "args": { + "External id": 253207,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918388879.596, "dur": 16163.550, + "args": { + "External id": 253208,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918405187.149, "dur": 20.461, + "args": { + "External id": 253209,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918405190.073, "dur": 16.866, + "args": { + "External id": 253210,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918405237.142, "dur": 317.294, + "args": { + "External id": 253211,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918405262.832, "dur": 287.316, + "args": { + "External id": 253212,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3355, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918405274.888, "dur": 270.368, + "args": { + "External id": 253213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918405571.993, "dur": 1.902, + "args": { + "External id": 253214,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3357, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405627.572, "dur": 6.364, + "args": { + "External id": 253215,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405727.695, "dur": 2.795, + "args": { + "External id": 253216,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405747.037, "dur": 3.038, + "args": { + "External id": 253217,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405760.376, "dur": 0.771, + "args": { + "External id": 253218,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405771.509, "dur": 0.910, + "args": { + "External id": 253219,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405782.893, "dur": 1.114, + "args": { + "External id": 253220,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405793.489, "dur": 2.327, + "args": { + "External id": 253221,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405806.547, "dur": 1.734, + "args": { + "External id": 253222,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918405818.349, "dur": 0.828, + "args": { + "External id": 253223,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918405912.630, "dur": 2581.917, + "args": { + "External id": 253224,"Record function id": 0, "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918405931.091, "dur": 959.139, + "args": { + "External id": 253225,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918405944.760, "dur": 311.934, + "args": { + "External id": 253226,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406020.899, "dur": 4.141, + "args": { + "External id": 253227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406028.045, "dur": 0.780, + "args": { + "External id": 253228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406030.759, "dur": 2.189, + "args": { + "External id": 253229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406034.918, "dur": 0.677, + "args": { + "External id": 253230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406037.317, "dur": 0.773, + "args": { + "External id": 253231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406039.687, "dur": 0.627, + "args": { + "External id": 253232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406042.255, "dur": 1.210, + "args": { + "External id": 253233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406045.134, "dur": 0.663, + "args": { + "External id": 253234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406047.297, "dur": 0.615, + "args": { + "External id": 253235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918406049.434, "dur": 0.663, + "args": { + "External id": 253236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918406068.127, "dur": 158.841, + "args": { + "External id": 253237,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918406083.737, "dur": 138.143, + "args": { + "External id": 253238,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918406096.255, "dur": 15.758, + "args": { + "External id": 253239,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918406114.895, "dur": 62.802, + "args": { + "External id": 253240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918406117.546, "dur": 59.833, + "args": { + "External id": 253241,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406121.495, "dur": 5.766, + "args": { + "External id": 253242,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918406128.904, "dur": 47.870, + "args": { + "External id": 253243,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 4183438, "tid": 31367, + "ts": 667918406332.270, "dur": 549.995, + "args": { + "External id": 253244,"Record function id": 0, "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918406348.557, "dur": 520.772, + "args": { + "External id": 253245,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918406403.585, "dur": 5.322, + "args": { + "External id": 253246,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918406424.640, "dur": 26.181, + "args": { + "External id": 253247,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406429.009, "dur": 1.543, + "args": { + "External id": 253248,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406432.015, "dur": 0.467, + "args": { + "External id": 253249,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406434.068, "dur": 0.679, + "args": { + "External id": 253250,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406436.500, "dur": 0.439, + "args": { + "External id": 253251,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406438.338, "dur": 0.520, + "args": { + "External id": 253252,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406440.139, "dur": 1.914, + "args": { + "External id": 253253,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406443.371, "dur": 0.537, + "args": { + "External id": 253254,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406445.427, "dur": 0.442, + "args": { + "External id": 253255,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406447.068, "dur": 0.284, + "args": { + "External id": 253256,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918406460.539, "dur": 29.786, + "args": { + "External id": 253257,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918406522.341, "dur": 89.966, + "args": { + "External id": 253258,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918406531.565, "dur": 3.180, + "args": { + "External id": 253259,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918406539.725, "dur": 9.481, + "args": { + "External id": 253260,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918406543.811, "dur": 5.001, + "args": { + "External id": 253261,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406546.997, "dur": 0.684, + "args": { + "External id": 253262,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918406556.216, "dur": 21.380, + "args": { + "External id": 253263,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406557.823, "dur": 0.922, + "args": { + "External id": 253264,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406559.950, "dur": 0.453, + "args": { + "External id": 253265,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406562.489, "dur": 1.847, + "args": { + "External id": 253266,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406566.082, "dur": 0.414, + "args": { + "External id": 253267,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406567.810, "dur": 0.326, + "args": { + "External id": 253268,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406569.498, "dur": 0.693, + "args": { + "External id": 253269,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406571.365, "dur": 0.441, + "args": { + "External id": 253270,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406573.025, "dur": 0.405, + "args": { + "External id": 253271,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918406574.558, "dur": 0.487, + "args": { + "External id": 253272,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918406587.996, "dur": 17.420, + "args": { + "External id": 253273,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918406688.442, "dur": 114.902, + "args": { + "External id": 253274,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918406712.684, "dur": 87.240, + "args": { + "External id": 253275,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3418, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918406723.059, "dur": 72.809, + "args": { + "External id": 253276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918406817.709, "dur": 1.563, + "args": { + "External id": 253277,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3420, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918406897.513, "dur": 1573.531, + "args": { + "External id": 253278,"Sequence number": 2987548, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3421 + } + }, + { + "ph": "f", "id": 196, "pid": 4183438, "tid": 31367, "ts": 667918406897.513, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407005.914, "dur": 102.662, + "args": { + "External id": 253279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918407152.159, "dur": 58.243, + "args": { + "External id": 253280,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918407229.615, "dur": 48.748, + "args": { + "External id": 253281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407287.816, "dur": 26.926, + "args": { + "External id": 253282,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407322.874, "dur": 33.480, + "args": { + "External id": 253283,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407362.854, "dur": 21.188, + "args": { + "External id": 253284,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407390.403, "dur": 29.639, + "args": { + "External id": 253285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918407442.726, "dur": 25.170, + "args": { + "External id": 253286,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918407485.223, "dur": 29.633, + "args": { + "External id": 253287,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918407532.864, "dur": 19.406, + "args": { + "External id": 253288,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918407566.349, "dur": 15.809, + "args": { + "External id": 253289,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407589.744, "dur": 36.114, + "args": { + "External id": 253290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407629.350, "dur": 70.183, + "args": { + "External id": 253291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918407732.185, "dur": 175.446, + "args": { + "External id": 253292,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918407812.352, "dur": 8.407, + "args": { + "External id": 253293,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918407822.529, "dur": 2.825, + "args": { + "External id": 253294,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918407936.697, "dur": 27.023, + "args": { + "External id": 253295,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918407975.032, "dur": 13.384, + "args": { + "External id": 253296,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918407996.162, "dur": 52.499, + "args": { + "External id": 253297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918408054.705, "dur": 34.083, + "args": { + "External id": 253298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918408094.775, "dur": 27.619, + "args": { + "External id": 253299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918408127.945, "dur": 27.976, + "args": { + "External id": 253300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918408164.648, "dur": 26.825, + "args": { + "External id": 253301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918408216.821, "dur": 35.985, + "args": { + "External id": 253302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918408271.047, "dur": 21.405, + "args": { + "External id": 253303,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918408322.720, "dur": 28.959, + "args": { + "External id": 253304,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918408367.253, "dur": 16.298, + "args": { + "External id": 253305,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918408397.952, "dur": 12.966, + "args": { + "External id": 253306,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918408424.259, "dur": 18.518, + "args": { + "External id": 253307,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408517.133, "dur": 14.860, + "args": { + "External id": 253308,"Record function id": 0, "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408520.233, "dur": 10.821, + "args": { + "External id": 253309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408524.708, "dur": 5.589, + "args": { + "External id": 253310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408526.091, "dur": 4.116, + "args": { + "External id": 253311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408535.692, "dur": 4.932, + "args": { + "External id": 253312,"Record function id": 0, "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408537.146, "dur": 3.027, + "args": { + "External id": 253313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408538.001, "dur": 1.667, + "args": { + "External id": 253314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408538.635, "dur": 0.933, + "args": { + "External id": 253315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408543.817, "dur": 7.196, + "args": { + "External id": 253316,"Record function id": 0, "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408545.138, "dur": 5.481, + "args": { + "External id": 253317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408545.921, "dur": 1.404, + "args": { + "External id": 253318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408546.552, "dur": 0.660, + "args": { + "External id": 253319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408554.145, "dur": 3.798, + "args": { + "External id": 253320,"Record function id": 0, "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408555.401, "dur": 2.091, + "args": { + "External id": 253321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408555.925, "dur": 1.138, + "args": { + "External id": 253322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408556.210, "dur": 0.747, + "args": { + "External id": 253323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408561.004, "dur": 5.410, + "args": { + "External id": 253324,"Record function id": 0, "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408562.003, "dur": 3.975, + "args": { + "External id": 253325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408562.507, "dur": 3.027, + "args": { + "External id": 253326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408562.853, "dur": 2.568, + "args": { + "External id": 253327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408569.428, "dur": 3.998, + "args": { + "External id": 253328,"Record function id": 0, "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408570.492, "dur": 2.542, + "args": { + "External id": 253329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408570.955, "dur": 1.686, + "args": { + "External id": 253330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408571.832, "dur": 0.739, + "args": { + "External id": 253331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408576.675, "dur": 3.954, + "args": { + "External id": 253332,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408577.829, "dur": 2.415, + "args": { + "External id": 253333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408578.361, "dur": 1.501, + "args": { + "External id": 253334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408579.199, "dur": 0.558, + "args": { + "External id": 253335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408583.597, "dur": 3.800, + "args": { + "External id": 253336,"Record function id": 0, "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408584.854, "dur": 2.147, + "args": { + "External id": 253337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408585.379, "dur": 1.250, + "args": { + "External id": 253338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408585.871, "dur": 0.658, + "args": { + "External id": 253339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408590.417, "dur": 3.867, + "args": { + "External id": 253340,"Record function id": 0, "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918408591.512, "dur": 2.387, + "args": { + "External id": 253341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918408592.096, "dur": 1.387, + "args": { + "External id": 253342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918408592.841, "dur": 0.567, + "args": { + "External id": 253343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918408597.781, "dur": 15260.479, + "args": { + "External id": 253344,"Record function id": 0, "Sequence number": 2987547, "Fwd thread id": 1, "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918408599.047, "dur": 15250.364, + "args": { + "External id": 253345,"Sequence number": 2987547, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3488 + } + }, + { + "ph": "f", "id": 197, "pid": 4183438, "tid": 31367, "ts": 667918408599.047, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918408628.048, "dur": 77.230, + "args": { + "External id": 253346,"Record function id": 0, "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918408714.383, "dur": 72.798, + "args": { + "External id": 253347,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 4183438, "tid": 31367, + "ts": 667918408794.222, "dur": 15047.557, + "args": { + "External id": 253348,"Record function id": 0, "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918408886.894, "dur": 10.053, + "args": { + "External id": 253349,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918408906.846, "dur": 5.237, + "args": { + "External id": 253350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918408925.740, "dur": 14195.468, + "args": { + "External id": 253351,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918408939.260, "dur": 14173.363, + "args": { + "External id": 253352,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918408964.625, "dur": 14.228, + "args": { + "External id": 253353,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918408983.682, "dur": 14093.753, + "args": { + "External id": 253354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918408986.015, "dur": 14090.597, + "args": { + "External id": 253355,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918408990.305, "dur": 6.056, + "args": { + "External id": 253356,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918408998.181, "dur": 14075.143, + "args": { + "External id": 253357,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918423226.883, "dur": 9.688, + "args": { + "External id": 253358,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918423229.684, "dur": 6.423, + "args": { + "External id": 253359,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918423264.528, "dur": 263.818, + "args": { + "External id": 253360,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918423290.644, "dur": 233.584, + "args": { + "External id": 253361,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3504, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918423302.287, "dur": 217.507, + "args": { + "External id": 253362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918423544.152, "dur": 1.980, + "args": { + "External id": 253363,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3506, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423597.881, "dur": 6.401, + "args": { + "External id": 253364,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423645.870, "dur": 1.200, + "args": { + "External id": 253365,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423701.182, "dur": 1.821, + "args": { + "External id": 253366,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423717.142, "dur": 2.247, + "args": { + "External id": 253367,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423731.301, "dur": 0.667, + "args": { + "External id": 253368,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423742.705, "dur": 0.731, + "args": { + "External id": 253369,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423754.686, "dur": 0.738, + "args": { + "External id": 253370,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423766.769, "dur": 3.136, + "args": { + "External id": 253371,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918423779.906, "dur": 0.769, + "args": { + "External id": 253372,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918423873.535, "dur": 2618.911, + "args": { + "External id": 253373,"Record function id": 0, "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918423892.352, "dur": 988.513, + "args": { + "External id": 253374,"Record function id": 0, "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918423906.270, "dur": 309.246, + "args": { + "External id": 253375,"Record function id": 0, "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918423983.667, "dur": 4.232, + "args": { + "External id": 253376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918423990.990, "dur": 0.818, + "args": { + "External id": 253377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918423993.939, "dur": 0.732, + "args": { + "External id": 253378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918423996.436, "dur": 0.700, + "args": { + "External id": 253379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918423998.801, "dur": 2.519, + "args": { + "External id": 253380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918424002.825, "dur": 0.819, + "args": { + "External id": 253381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918424005.345, "dur": 1.175, + "args": { + "External id": 253382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918424008.261, "dur": 0.634, + "args": { + "External id": 253383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918424010.618, "dur": 0.783, + "args": { + "External id": 253384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918424013.017, "dur": 0.570, + "args": { + "External id": 253385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918424031.004, "dur": 138.488, + "args": { + "External id": 253386,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918424046.476, "dur": 118.717, + "args": { + "External id": 253387,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918424059.545, "dur": 13.321, + "args": { + "External id": 253388,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918424075.653, "dur": 63.078, + "args": { + "External id": 253389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918424078.278, "dur": 60.056, + "args": { + "External id": 253390,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424081.980, "dur": 5.556, + "args": { + "External id": 253391,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918424089.339, "dur": 48.444, + "args": { + "External id": 253392,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 4183438, "tid": 31367, + "ts": 667918424298.655, "dur": 574.484, + "args": { + "External id": 253393,"Record function id": 0, "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918424315.513, "dur": 544.577, + "args": { + "External id": 253394,"Record function id": 0, "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918424373.688, "dur": 7.113, + "args": { + "External id": 253395,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918424396.970, "dur": 25.172, + "args": { + "External id": 253396,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424401.313, "dur": 1.518, + "args": { + "External id": 253397,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424404.537, "dur": 0.404, + "args": { + "External id": 253398,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424406.415, "dur": 0.357, + "args": { + "External id": 253399,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424408.362, "dur": 0.428, + "args": { + "External id": 253400,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424410.018, "dur": 0.621, + "args": { + "External id": 253401,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424412.079, "dur": 0.483, + "args": { + "External id": 253402,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424413.646, "dur": 0.269, + "args": { + "External id": 253403,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424415.379, "dur": 2.280, + "args": { + "External id": 253404,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424418.820, "dur": 0.323, + "args": { + "External id": 253405,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918424432.400, "dur": 31.487, + "args": { + "External id": 253406,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918424494.251, "dur": 93.589, + "args": { + "External id": 253407,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918424503.719, "dur": 3.088, + "args": { + "External id": 253408,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918424512.498, "dur": 9.242, + "args": { + "External id": 253409,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918424516.411, "dur": 4.951, + "args": { + "External id": 253410,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424519.318, "dur": 0.860, + "args": { + "External id": 253411,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918424529.738, "dur": 22.870, + "args": { + "External id": 253412,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424531.294, "dur": 0.406, + "args": { + "External id": 253413,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424533.255, "dur": 0.468, + "args": { + "External id": 253414,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424535.076, "dur": 0.762, + "args": { + "External id": 253415,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424537.159, "dur": 0.323, + "args": { + "External id": 253416,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424539.161, "dur": 2.148, + "args": { + "External id": 253417,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424542.352, "dur": 0.315, + "args": { + "External id": 253418,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424543.889, "dur": 0.210, + "args": { + "External id": 253419,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424545.115, "dur": 0.580, + "args": { + "External id": 253420,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918424547.101, "dur": 0.413, + "args": { + "External id": 253421,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918424562.374, "dur": 18.322, + "args": { + "External id": 253422,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918424631.037, "dur": 152.376, + "args": { + "External id": 253423,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918424692.065, "dur": 87.274, + "args": { + "External id": 253424,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3567, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918424703.184, "dur": 72.234, + "args": { + "External id": 253425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918424802.198, "dur": 1.672, + "args": { + "External id": 253426,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3569, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918424887.991, "dur": 1583.877, + "args": { + "External id": 253427,"Sequence number": 2987546, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3570 + } + }, + { + "ph": "f", "id": 198, "pid": 4183438, "tid": 31367, "ts": 667918424887.991, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918424994.583, "dur": 101.643, + "args": { + "External id": 253428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918425135.537, "dur": 34.942, + "args": { + "External id": 253429,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918425186.257, "dur": 65.442, + "args": { + "External id": 253430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425263.829, "dur": 29.174, + "args": { + "External id": 253431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425298.622, "dur": 33.152, + "args": { + "External id": 253432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425339.690, "dur": 20.474, + "args": { + "External id": 253433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425366.797, "dur": 31.125, + "args": { + "External id": 253434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918425420.738, "dur": 26.493, + "args": { + "External id": 253435,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918425464.543, "dur": 27.073, + "args": { + "External id": 253436,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918425509.138, "dur": 18.823, + "args": { + "External id": 253437,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918425540.355, "dur": 13.041, + "args": { + "External id": 253438,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425562.335, "dur": 35.297, + "args": { + "External id": 253439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425601.276, "dur": 30.484, + "args": { + "External id": 253440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918425711.000, "dur": 172.712, + "args": { + "External id": 253441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918425790.631, "dur": 6.511, + "args": { + "External id": 253442,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918425799.150, "dur": 3.594, + "args": { + "External id": 253443,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918425918.045, "dur": 22.727, + "args": { + "External id": 253444,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918425951.566, "dur": 16.526, + "args": { + "External id": 253445,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918425977.141, "dur": 51.769, + "args": { + "External id": 253446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918426036.880, "dur": 31.595, + "args": { + "External id": 253447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918426077.592, "dur": 29.177, + "args": { + "External id": 253448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918426111.653, "dur": 28.007, + "args": { + "External id": 253449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918426145.120, "dur": 26.497, + "args": { + "External id": 253450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918426179.332, "dur": 48.220, + "args": { + "External id": 253451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918426247.444, "dur": 37.848, + "args": { + "External id": 253452,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918426315.094, "dur": 28.963, + "args": { + "External id": 253453,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918426360.833, "dur": 17.504, + "args": { + "External id": 253454,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918426393.715, "dur": 18.492, + "args": { + "External id": 253455,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918426424.064, "dur": 15.824, + "args": { + "External id": 253456,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426514.951, "dur": 18.618, + "args": { + "External id": 253457,"Record function id": 0, "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426518.073, "dur": 14.660, + "args": { + "External id": 253458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426522.641, "dur": 9.135, + "args": { + "External id": 253459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426523.984, "dur": 7.658, + "args": { + "External id": 253460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426537.401, "dur": 4.239, + "args": { + "External id": 253461,"Record function id": 0, "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426538.542, "dur": 2.648, + "args": { + "External id": 253462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426539.132, "dur": 1.556, + "args": { + "External id": 253463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426539.478, "dur": 1.110, + "args": { + "External id": 253464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426544.832, "dur": 5.233, + "args": { + "External id": 253465,"Record function id": 0, "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426545.935, "dur": 3.679, + "args": { + "External id": 253466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426546.480, "dur": 2.700, + "args": { + "External id": 253467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426546.799, "dur": 2.297, + "args": { + "External id": 253468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426553.205, "dur": 4.165, + "args": { + "External id": 253469,"Record function id": 0, "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426554.426, "dur": 2.543, + "args": { + "External id": 253470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426554.941, "dur": 1.610, + "args": { + "External id": 253471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426555.420, "dur": 1.029, + "args": { + "External id": 253472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426560.506, "dur": 3.485, + "args": { + "External id": 253473,"Record function id": 0, "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426561.470, "dur": 2.090, + "args": { + "External id": 253474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426561.976, "dur": 1.036, + "args": { + "External id": 253475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426562.343, "dur": 0.564, + "args": { + "External id": 253476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426567.103, "dur": 4.246, + "args": { + "External id": 253477,"Record function id": 0, "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426568.306, "dur": 2.595, + "args": { + "External id": 253478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426568.936, "dur": 1.482, + "args": { + "External id": 253479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426569.675, "dur": 0.676, + "args": { + "External id": 253480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426574.459, "dur": 3.708, + "args": { + "External id": 253481,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426575.525, "dur": 2.247, + "args": { + "External id": 253482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426576.093, "dur": 1.239, + "args": { + "External id": 253483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426576.689, "dur": 0.571, + "args": { + "External id": 253484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426581.112, "dur": 7.142, + "args": { + "External id": 253485,"Record function id": 0, "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426582.211, "dur": 5.616, + "args": { + "External id": 253486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426582.766, "dur": 4.655, + "args": { + "External id": 253487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426583.424, "dur": 3.922, + "args": { + "External id": 253488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426591.820, "dur": 3.367, + "args": { + "External id": 253489,"Record function id": 0, "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918426592.747, "dur": 2.056, + "args": { + "External id": 253490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918426593.262, "dur": 1.116, + "args": { + "External id": 253491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918426593.666, "dur": 0.639, + "args": { + "External id": 253492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918426599.366, "dur": 14839.623, + "args": { + "External id": 253493,"Record function id": 0, "Sequence number": 2987545, "Fwd thread id": 1, "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918426600.532, "dur": 14830.071, + "args": { + "External id": 253494,"Sequence number": 2987545, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3637 + } + }, + { + "ph": "f", "id": 199, "pid": 4183438, "tid": 31367, "ts": 667918426600.532, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918426628.968, "dur": 80.192, + "args": { + "External id": 253495,"Record function id": 0, "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918426718.546, "dur": 65.523, + "args": { + "External id": 253496,"Record function id": 0, "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 4183438, "tid": 31367, + "ts": 667918426790.903, "dur": 14631.690, + "args": { + "External id": 253497,"Record function id": 0, "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918426883.398, "dur": 6.750, + "args": { + "External id": 253498,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918426903.492, "dur": 6.698, + "args": { + "External id": 253499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918426923.769, "dur": 13835.715, + "args": { + "External id": 253500,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918426937.222, "dur": 13813.821, + "args": { + "External id": 253501,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918426960.169, "dur": 13.670, + "args": { + "External id": 253502,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918426979.119, "dur": 13737.590, + "args": { + "External id": 253503,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918426981.719, "dur": 13734.303, + "args": { + "External id": 253504,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918426985.929, "dur": 4.796, + "args": { + "External id": 253505,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918426992.505, "dur": 13720.242, + "args": { + "External id": 253506,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918440846.274, "dur": 8.949, + "args": { + "External id": 253507,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918440849.138, "dur": 5.771, + "args": { + "External id": 253508,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918440882.415, "dur": 256.319, + "args": { + "External id": 253509,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918440908.660, "dur": 225.933, + "args": { + "External id": 253510,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3653, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918440919.483, "dur": 210.532, + "args": { + "External id": 253511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918441154.403, "dur": 1.760, + "args": { + "External id": 253512,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3655, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441223.211, "dur": 6.735, + "args": { + "External id": 253513,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441275.235, "dur": 1.432, + "args": { + "External id": 253514,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441291.374, "dur": 1.088, + "args": { + "External id": 253515,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441305.324, "dur": 2.472, + "args": { + "External id": 253516,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441317.469, "dur": 0.847, + "args": { + "External id": 253517,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441328.708, "dur": 0.700, + "args": { + "External id": 253518,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441339.679, "dur": 0.991, + "args": { + "External id": 253519,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441350.897, "dur": 3.031, + "args": { + "External id": 253520,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441363.640, "dur": 0.865, + "args": { + "External id": 253521,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918441454.665, "dur": 2671.890, + "args": { + "External id": 253522,"Record function id": 0, "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918441473.764, "dur": 989.917, + "args": { + "External id": 253523,"Record function id": 0, "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918441487.868, "dur": 331.220, + "args": { + "External id": 253524,"Record function id": 0, "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441562.603, "dur": 4.134, + "args": { + "External id": 253525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441569.827, "dur": 0.865, + "args": { + "External id": 253526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441572.657, "dur": 1.451, + "args": { + "External id": 253527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441575.800, "dur": 1.050, + "args": { + "External id": 253528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441578.698, "dur": 2.630, + "args": { + "External id": 253529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441583.142, "dur": 1.004, + "args": { + "External id": 253530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441586.055, "dur": 0.923, + "args": { + "External id": 253531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441588.905, "dur": 1.080, + "args": { + "External id": 253532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441591.653, "dur": 0.838, + "args": { + "External id": 253533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918441594.255, "dur": 1.178, + "args": { + "External id": 253534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918441613.123, "dur": 173.585, + "args": { + "External id": 253535,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918441628.223, "dur": 154.020, + "args": { + "External id": 253536,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918441640.224, "dur": 47.930, + "args": { + "External id": 253537,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918441692.116, "dur": 63.787, + "args": { + "External id": 253538,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918441694.528, "dur": 61.082, + "args": { + "External id": 253539,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918441698.574, "dur": 7.134, + "args": { + "External id": 253540,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918441707.282, "dur": 47.695, + "args": { + "External id": 253541,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 4183438, "tid": 31367, + "ts": 667918441901.116, "dur": 554.321, + "args": { + "External id": 253542,"Record function id": 0, "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918441918.978, "dur": 520.387, + "args": { + "External id": 253543,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918441977.801, "dur": 6.333, + "args": { + "External id": 253544,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918441999.702, "dur": 26.884, + "args": { + "External id": 253545,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442004.172, "dur": 1.434, + "args": { + "External id": 253546,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442007.732, "dur": 0.644, + "args": { + "External id": 253547,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442009.492, "dur": 0.708, + "args": { + "External id": 253548,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442011.817, "dur": 0.863, + "args": { + "External id": 253549,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442013.659, "dur": 0.463, + "args": { + "External id": 253550,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442015.268, "dur": 0.270, + "args": { + "External id": 253551,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442016.593, "dur": 0.291, + "args": { + "External id": 253552,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442018.091, "dur": 2.146, + "args": { + "External id": 253553,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442021.046, "dur": 0.473, + "args": { + "External id": 253554,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918442037.178, "dur": 31.254, + "args": { + "External id": 253555,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918442098.560, "dur": 114.080, + "args": { + "External id": 253556,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918442107.937, "dur": 3.044, + "args": { + "External id": 253557,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918442116.189, "dur": 10.969, + "args": { + "External id": 253558,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918442120.534, "dur": 6.189, + "args": { + "External id": 253559,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442124.243, "dur": 0.998, + "args": { + "External id": 253560,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918442133.217, "dur": 24.693, + "args": { + "External id": 253561,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442134.877, "dur": 0.405, + "args": { + "External id": 253562,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442136.954, "dur": 0.470, + "args": { + "External id": 253563,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442138.878, "dur": 0.414, + "args": { + "External id": 253564,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442140.752, "dur": 0.372, + "args": { + "External id": 253565,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442142.358, "dur": 2.463, + "args": { + "External id": 253566,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442146.445, "dur": 0.498, + "args": { + "External id": 253567,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442148.144, "dur": 0.634, + "args": { + "External id": 253568,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442150.503, "dur": 0.533, + "args": { + "External id": 253569,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918442152.293, "dur": 0.700, + "args": { + "External id": 253570,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918442167.265, "dur": 18.189, + "args": { + "External id": 253571,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918442258.383, "dur": 114.342, + "args": { + "External id": 253572,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918442282.799, "dur": 86.257, + "args": { + "External id": 253573,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3716, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918442292.545, "dur": 72.321, + "args": { + "External id": 253574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918442387.432, "dur": 1.740, + "args": { + "External id": 253575,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3718, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918442471.337, "dur": 1635.075, + "args": { + "External id": 253576,"Sequence number": 2987544, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3719 + } + }, + { + "ph": "f", "id": 200, "pid": 4183438, "tid": 31367, "ts": 667918442471.337, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918442581.735, "dur": 145.678, + "args": { + "External id": 253577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918442774.773, "dur": 38.335, + "args": { + "External id": 253578,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918442829.441, "dur": 48.641, + "args": { + "External id": 253579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918442887.714, "dur": 29.117, + "args": { + "External id": 253580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918442923.284, "dur": 35.261, + "args": { + "External id": 253581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918442967.038, "dur": 20.087, + "args": { + "External id": 253582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918442994.754, "dur": 32.338, + "args": { + "External id": 253583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918443048.911, "dur": 23.189, + "args": { + "External id": 253584,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918443091.382, "dur": 34.714, + "args": { + "External id": 253585,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918443144.136, "dur": 18.921, + "args": { + "External id": 253586,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918443174.242, "dur": 14.852, + "args": { + "External id": 253587,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443217.947, "dur": 41.761, + "args": { + "External id": 253588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443263.443, "dur": 31.229, + "args": { + "External id": 253589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918443322.043, "dur": 176.622, + "args": { + "External id": 253590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918443403.501, "dur": 6.493, + "args": { + "External id": 253591,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918443412.006, "dur": 2.998, + "args": { + "External id": 253592,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918443527.872, "dur": 28.400, + "args": { + "External id": 253593,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918443568.718, "dur": 16.442, + "args": { + "External id": 253594,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443592.340, "dur": 45.554, + "args": { + "External id": 253595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443645.281, "dur": 80.016, + "args": { + "External id": 253596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443734.912, "dur": 29.457, + "args": { + "External id": 253597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443769.096, "dur": 27.528, + "args": { + "External id": 253598,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443802.249, "dur": 26.907, + "args": { + "External id": 253599,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918443837.104, "dur": 47.209, + "args": { + "External id": 253600,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918443908.946, "dur": 25.366, + "args": { + "External id": 253601,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918443952.473, "dur": 28.942, + "args": { + "External id": 253602,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918443994.872, "dur": 16.855, + "args": { + "External id": 253603,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918444029.973, "dur": 15.924, + "args": { + "External id": 253604,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918444057.929, "dur": 17.277, + "args": { + "External id": 253605,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444151.921, "dur": 14.534, + "args": { + "External id": 253606,"Record function id": 0, "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444154.767, "dur": 10.789, + "args": { + "External id": 253607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444159.142, "dur": 5.600, + "args": { + "External id": 253608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444160.394, "dur": 4.265, + "args": { + "External id": 253609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444170.353, "dur": 4.370, + "args": { + "External id": 253610,"Record function id": 0, "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444171.896, "dur": 2.409, + "args": { + "External id": 253611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444172.574, "dur": 1.265, + "args": { + "External id": 253612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444172.892, "dur": 0.851, + "args": { + "External id": 253613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444177.998, "dur": 3.785, + "args": { + "External id": 253614,"Record function id": 0, "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444179.242, "dur": 2.113, + "args": { + "External id": 253615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444179.736, "dur": 1.168, + "args": { + "External id": 253616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444180.118, "dur": 0.714, + "args": { + "External id": 253617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444184.971, "dur": 5.511, + "args": { + "External id": 253618,"Record function id": 0, "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444186.281, "dur": 3.806, + "args": { + "External id": 253619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444186.795, "dur": 2.867, + "args": { + "External id": 253620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444187.097, "dur": 2.478, + "args": { + "External id": 253621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444216.886, "dur": 6.352, + "args": { + "External id": 253622,"Record function id": 0, "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444218.759, "dur": 3.778, + "args": { + "External id": 253623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444219.703, "dur": 2.066, + "args": { + "External id": 253624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444220.292, "dur": 1.271, + "args": { + "External id": 253625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444226.595, "dur": 4.247, + "args": { + "External id": 253626,"Record function id": 0, "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444227.716, "dur": 2.711, + "args": { + "External id": 253627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444228.370, "dur": 1.619, + "args": { + "External id": 253628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444229.328, "dur": 0.586, + "args": { + "External id": 253629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444234.129, "dur": 3.449, + "args": { + "External id": 253630,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444235.146, "dur": 2.022, + "args": { + "External id": 253631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444235.663, "dur": 1.115, + "args": { + "External id": 253632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444236.147, "dur": 0.558, + "args": { + "External id": 253633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444240.575, "dur": 3.881, + "args": { + "External id": 253634,"Record function id": 0, "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444241.817, "dur": 2.228, + "args": { + "External id": 253635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444242.382, "dur": 1.238, + "args": { + "External id": 253636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444242.980, "dur": 0.569, + "args": { + "External id": 253637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444247.978, "dur": 3.967, + "args": { + "External id": 253638,"Record function id": 0, "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918444249.038, "dur": 2.500, + "args": { + "External id": 253639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918444249.529, "dur": 1.622, + "args": { + "External id": 253640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918444250.436, "dur": 0.640, + "args": { + "External id": 253641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918444259.380, "dur": 14792.307, + "args": { + "External id": 253642,"Record function id": 0, "Sequence number": 2987543, "Fwd thread id": 1, "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918444260.643, "dur": 14782.451, + "args": { + "External id": 253643,"Sequence number": 2987543, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3786 + } + }, + { + "ph": "f", "id": 201, "pid": 4183438, "tid": 31367, "ts": 667918444260.643, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918444289.644, "dur": 38.940, + "args": { + "External id": 253644,"Record function id": 0, "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918444335.738, "dur": 64.763, + "args": { + "External id": 253645,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 4183438, "tid": 31367, + "ts": 667918444406.942, "dur": 14628.492, + "args": { + "External id": 253646,"Record function id": 0, "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918444498.342, "dur": 10.525, + "args": { + "External id": 253647,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918444517.906, "dur": 4.639, + "args": { + "External id": 253648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918444536.237, "dur": 13762.381, + "args": { + "External id": 253649,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918444549.300, "dur": 13741.274, + "args": { + "External id": 253650,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918444571.024, "dur": 18.495, + "args": { + "External id": 253651,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918444594.662, "dur": 13660.718, + "args": { + "External id": 253652,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918444597.461, "dur": 13657.213, + "args": { + "External id": 253653,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918444601.436, "dur": 5.101, + "args": { + "External id": 253654,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918444608.393, "dur": 13642.361, + "args": { + "External id": 253655,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918458386.998, "dur": 8.659, + "args": { + "External id": 253656,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918458389.751, "dur": 5.524, + "args": { + "External id": 253657,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918458422.289, "dur": 337.677, + "args": { + "External id": 253658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918458447.166, "dur": 307.827, + "args": { + "External id": 253659,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3802, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918458457.955, "dur": 291.359, + "args": { + "External id": 253660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918458778.421, "dur": 2.470, + "args": { + "External id": 253661,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3804, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458836.718, "dur": 6.483, + "args": { + "External id": 253662,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458888.462, "dur": 1.287, + "args": { + "External id": 253663,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458905.130, "dur": 1.900, + "args": { + "External id": 253664,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458918.534, "dur": 2.522, + "args": { + "External id": 253665,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458931.232, "dur": 0.876, + "args": { + "External id": 253666,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458941.571, "dur": 0.749, + "args": { + "External id": 253667,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458952.131, "dur": 0.746, + "args": { + "External id": 253668,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458963.374, "dur": 2.272, + "args": { + "External id": 253669,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918458976.280, "dur": 0.757, + "args": { + "External id": 253670,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918459065.602, "dur": 2571.335, + "args": { + "External id": 253671,"Record function id": 0, "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918459083.919, "dur": 959.549, + "args": { + "External id": 253672,"Record function id": 0, "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918459099.414, "dur": 305.648, + "args": { + "External id": 253673,"Record function id": 0, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459174.832, "dur": 4.293, + "args": { + "External id": 253674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459182.065, "dur": 0.826, + "args": { + "External id": 253675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459184.498, "dur": 1.006, + "args": { + "External id": 253676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459187.359, "dur": 0.714, + "args": { + "External id": 253677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459189.451, "dur": 1.882, + "args": { + "External id": 253678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459210.354, "dur": 1.265, + "args": { + "External id": 253679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459213.830, "dur": 0.810, + "args": { + "External id": 253680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459216.307, "dur": 0.712, + "args": { + "External id": 253681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459218.629, "dur": 0.989, + "args": { + "External id": 253682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918459221.022, "dur": 0.727, + "args": { + "External id": 253683,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918459240.630, "dur": 136.984, + "args": { + "External id": 253684,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918459256.603, "dur": 116.843, + "args": { + "External id": 253685,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918459268.885, "dur": 13.173, + "args": { + "External id": 253686,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918459284.824, "dur": 61.777, + "args": { + "External id": 253687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918459287.261, "dur": 58.996, + "args": { + "External id": 253688,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459291.068, "dur": 5.606, + "args": { + "External id": 253689,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918459298.145, "dur": 47.636, + "args": { + "External id": 253690,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 4183438, "tid": 31367, + "ts": 667918459482.369, "dur": 553.417, + "args": { + "External id": 253691,"Record function id": 0, "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918459497.382, "dur": 526.093, + "args": { + "External id": 253692,"Record function id": 0, "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918459554.041, "dur": 5.652, + "args": { + "External id": 253693,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918459575.556, "dur": 25.361, + "args": { + "External id": 253694,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459580.090, "dur": 1.703, + "args": { + "External id": 253695,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459583.632, "dur": 0.542, + "args": { + "External id": 253696,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459585.434, "dur": 0.604, + "args": { + "External id": 253697,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459587.632, "dur": 0.436, + "args": { + "External id": 253698,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459589.019, "dur": 0.462, + "args": { + "External id": 253699,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459590.796, "dur": 0.455, + "args": { + "External id": 253700,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459592.290, "dur": 0.248, + "args": { + "External id": 253701,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459593.491, "dur": 1.895, + "args": { + "External id": 253702,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459596.249, "dur": 0.571, + "args": { + "External id": 253703,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918459609.876, "dur": 28.946, + "args": { + "External id": 253704,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918459709.521, "dur": 98.049, + "args": { + "External id": 253705,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918459720.130, "dur": 4.795, + "args": { + "External id": 253706,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918459730.604, "dur": 10.160, + "args": { + "External id": 253707,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918459734.857, "dur": 5.517, + "args": { + "External id": 253708,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459738.195, "dur": 0.570, + "args": { + "External id": 253709,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918459747.557, "dur": 20.242, + "args": { + "External id": 253710,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459749.903, "dur": 0.480, + "args": { + "External id": 253711,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459751.707, "dur": 0.354, + "args": { + "External id": 253712,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459753.432, "dur": 0.647, + "args": { + "External id": 253713,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459755.662, "dur": 0.619, + "args": { + "External id": 253714,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459757.488, "dur": 1.487, + "args": { + "External id": 253715,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459760.215, "dur": 0.202, + "args": { + "External id": 253716,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459761.445, "dur": 0.365, + "args": { + "External id": 253717,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459763.177, "dur": 0.311, + "args": { + "External id": 253718,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918459764.792, "dur": 0.514, + "args": { + "External id": 253719,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918459778.760, "dur": 20.937, + "args": { + "External id": 253720,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918459850.914, "dur": 108.822, + "args": { + "External id": 253721,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918459873.054, "dur": 83.480, + "args": { + "External id": 253722,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3865, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918459882.377, "dur": 69.819, + "args": { + "External id": 253723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918459972.795, "dur": 1.774, + "args": { + "External id": 253724,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3867, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918460050.524, "dur": 1568.020, + "args": { + "External id": 253725,"Sequence number": 2987542, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3868 + } + }, + { + "ph": "f", "id": 202, "pid": 4183438, "tid": 31367, "ts": 667918460050.524, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460167.095, "dur": 123.054, + "args": { + "External id": 253726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918460331.992, "dur": 36.258, + "args": { + "External id": 253727,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918460384.851, "dur": 46.558, + "args": { + "External id": 253728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460440.260, "dur": 28.765, + "args": { + "External id": 253729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460474.806, "dur": 32.269, + "args": { + "External id": 253730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460514.643, "dur": 19.803, + "args": { + "External id": 253731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460540.989, "dur": 28.376, + "args": { + "External id": 253732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918460591.763, "dur": 21.167, + "args": { + "External id": 253733,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918460629.202, "dur": 71.929, + "args": { + "External id": 253734,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918460724.089, "dur": 19.847, + "args": { + "External id": 253735,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918460756.117, "dur": 14.692, + "args": { + "External id": 253736,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460779.963, "dur": 40.922, + "args": { + "External id": 253737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918460824.300, "dur": 30.419, + "args": { + "External id": 253738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918460880.739, "dur": 172.051, + "args": { + "External id": 253739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918460962.395, "dur": 5.843, + "args": { + "External id": 253740,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918460970.354, "dur": 2.558, + "args": { + "External id": 253741,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918461080.094, "dur": 24.944, + "args": { + "External id": 253742,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918461115.544, "dur": 14.452, + "args": { + "External id": 253743,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461136.740, "dur": 43.897, + "args": { + "External id": 253744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461187.545, "dur": 55.216, + "args": { + "External id": 253745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461251.746, "dur": 29.046, + "args": { + "External id": 253746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461285.597, "dur": 31.796, + "args": { + "External id": 253747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461322.882, "dur": 26.431, + "args": { + "External id": 253748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918461360.481, "dur": 27.559, + "args": { + "External id": 253749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918461404.688, "dur": 22.269, + "args": { + "External id": 253750,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918461443.181, "dur": 23.909, + "args": { + "External id": 253751,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918461498.537, "dur": 24.829, + "args": { + "External id": 253752,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918461542.925, "dur": 15.835, + "args": { + "External id": 253753,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918461569.866, "dur": 18.078, + "args": { + "External id": 253754,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461696.783, "dur": 16.575, + "args": { + "External id": 253755,"Record function id": 0, "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461700.562, "dur": 11.533, + "args": { + "External id": 253756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461704.991, "dur": 5.793, + "args": { + "External id": 253757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461706.266, "dur": 4.278, + "args": { + "External id": 253758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461721.803, "dur": 4.593, + "args": { + "External id": 253759,"Record function id": 0, "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461723.293, "dur": 2.674, + "args": { + "External id": 253760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461724.167, "dur": 1.319, + "args": { + "External id": 253761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461724.493, "dur": 0.898, + "args": { + "External id": 253762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461729.708, "dur": 3.897, + "args": { + "External id": 253763,"Record function id": 0, "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461730.949, "dur": 2.242, + "args": { + "External id": 253764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461731.496, "dur": 1.225, + "args": { + "External id": 253765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461732.023, "dur": 0.606, + "args": { + "External id": 253766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461736.844, "dur": 5.344, + "args": { + "External id": 253767,"Record function id": 0, "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461737.949, "dur": 3.820, + "args": { + "External id": 253768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461738.614, "dur": 2.733, + "args": { + "External id": 253769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461739.013, "dur": 2.222, + "args": { + "External id": 253770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461745.356, "dur": 4.441, + "args": { + "External id": 253771,"Record function id": 0, "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461746.653, "dur": 2.670, + "args": { + "External id": 253772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461747.424, "dur": 1.504, + "args": { + "External id": 253773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461748.047, "dur": 0.777, + "args": { + "External id": 253774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461752.908, "dur": 3.880, + "args": { + "External id": 253775,"Record function id": 0, "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461754.050, "dur": 2.366, + "args": { + "External id": 253776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461754.831, "dur": 1.173, + "args": { + "External id": 253777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461755.417, "dur": 0.491, + "args": { + "External id": 253778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461760.026, "dur": 3.881, + "args": { + "External id": 253779,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461761.213, "dur": 2.246, + "args": { + "External id": 253780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461761.824, "dur": 1.210, + "args": { + "External id": 253781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461762.378, "dur": 0.557, + "args": { + "External id": 253782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461766.970, "dur": 3.730, + "args": { + "External id": 253783,"Record function id": 0, "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461768.118, "dur": 2.139, + "args": { + "External id": 253784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461768.683, "dur": 1.161, + "args": { + "External id": 253785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461769.238, "dur": 0.503, + "args": { + "External id": 253786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461773.722, "dur": 8.135, + "args": { + "External id": 253787,"Record function id": 0, "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918461774.772, "dur": 6.625, + "args": { + "External id": 253788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918461778.626, "dur": 2.368, + "args": { + "External id": 253789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918461780.286, "dur": 0.632, + "args": { + "External id": 253790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918461785.874, "dur": 14743.275, + "args": { + "External id": 253791,"Record function id": 0, "Sequence number": 2987541, "Fwd thread id": 1, "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918461787.212, "dur": 14733.674, + "args": { + "External id": 253792,"Sequence number": 2987541, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 3935 + } + }, + { + "ph": "f", "id": 203, "pid": 4183438, "tid": 31367, "ts": 667918461787.212, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918461818.223, "dur": 36.515, + "args": { + "External id": 253793,"Record function id": 0, "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918461861.641, "dur": 62.313, + "args": { + "External id": 253794,"Record function id": 0, "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 4183438, "tid": 31367, + "ts": 667918461930.426, "dur": 14582.940, + "args": { + "External id": 253795,"Record function id": 0, "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918462023.128, "dur": 7.022, + "args": { + "External id": 253796,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918462039.968, "dur": 5.040, + "args": { + "External id": 253797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918462057.769, "dur": 13773.608, + "args": { + "External id": 253798,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918462074.201, "dur": 13748.513, + "args": { + "External id": 253799,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918462097.663, "dur": 14.953, + "args": { + "External id": 253800,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918462117.648, "dur": 13666.844, + "args": { + "External id": 253801,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918462120.060, "dur": 13663.718, + "args": { + "External id": 253802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918462124.146, "dur": 4.515, + "args": { + "External id": 253803,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918462130.532, "dur": 13649.654, + "args": { + "External id": 253804,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918475921.288, "dur": 9.232, + "args": { + "External id": 253805,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918475924.203, "dur": 6.015, + "args": { + "External id": 253806,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918475962.448, "dur": 278.610, + "args": { + "External id": 253807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918475987.980, "dur": 247.878, + "args": { + "External id": 253808,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3951, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918475998.805, "dur": 230.721, + "args": { + "External id": 253809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918476258.919, "dur": 2.423, + "args": { + "External id": 253810,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3953, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476314.962, "dur": 6.940, + "args": { + "External id": 253811,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476367.995, "dur": 1.549, + "args": { + "External id": 253812,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476386.613, "dur": 1.087, + "args": { + "External id": 253813,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476399.161, "dur": 1.829, + "args": { + "External id": 253814,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476410.911, "dur": 0.913, + "args": { + "External id": 253815,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476421.228, "dur": 0.808, + "args": { + "External id": 253816,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476431.313, "dur": 0.748, + "args": { + "External id": 253817,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476441.769, "dur": 2.188, + "args": { + "External id": 253818,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476453.162, "dur": 0.828, + "args": { + "External id": 253819,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918476543.271, "dur": 2596.454, + "args": { + "External id": 253820,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918476563.041, "dur": 967.888, + "args": { + "External id": 253821,"Record function id": 0, "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918476579.002, "dur": 327.157, + "args": { + "External id": 253822,"Record function id": 0, "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476689.917, "dur": 4.993, + "args": { + "External id": 253823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476698.562, "dur": 1.118, + "args": { + "External id": 253824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476701.499, "dur": 0.862, + "args": { + "External id": 253825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476703.936, "dur": 0.805, + "args": { + "External id": 253826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476706.377, "dur": 1.617, + "args": { + "External id": 253827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476709.139, "dur": 0.988, + "args": { + "External id": 253828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476711.565, "dur": 0.683, + "args": { + "External id": 253829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476713.793, "dur": 0.854, + "args": { + "External id": 253830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476716.633, "dur": 0.773, + "args": { + "External id": 253831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918476719.122, "dur": 0.916, + "args": { + "External id": 253832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918476737.991, "dur": 139.155, + "args": { + "External id": 253833,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918476754.290, "dur": 118.613, + "args": { + "External id": 253834,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918476766.535, "dur": 13.974, + "args": { + "External id": 253835,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918476783.234, "dur": 61.473, + "args": { + "External id": 253836,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918476785.864, "dur": 58.573, + "args": { + "External id": 253837,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918476789.541, "dur": 5.333, + "args": { + "External id": 253838,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918476796.750, "dur": 47.123, + "args": { + "External id": 253839,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 4183438, "tid": 31367, + "ts": 667918476992.403, "dur": 530.111, + "args": { + "External id": 253840,"Record function id": 0, "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918477008.452, "dur": 501.543, + "args": { + "External id": 253841,"Record function id": 0, "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918477067.978, "dur": 5.782, + "args": { + "External id": 253842,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918477089.801, "dur": 24.381, + "args": { + "External id": 253843,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477093.984, "dur": 1.468, + "args": { + "External id": 253844,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477097.046, "dur": 0.584, + "args": { + "External id": 253845,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477099.294, "dur": 0.561, + "args": { + "External id": 253846,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477101.098, "dur": 0.609, + "args": { + "External id": 253847,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477103.172, "dur": 0.339, + "args": { + "External id": 253848,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477104.460, "dur": 0.297, + "args": { + "External id": 253849,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477106.015, "dur": 0.285, + "args": { + "External id": 253850,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477107.456, "dur": 1.828, + "args": { + "External id": 253851,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477110.682, "dur": 0.402, + "args": { + "External id": 253852,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918477123.533, "dur": 29.366, + "args": { + "External id": 253853,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918477181.146, "dur": 114.406, + "args": { + "External id": 253854,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918477189.901, "dur": 22.380, + "args": { + "External id": 253855,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918477219.647, "dur": 10.361, + "args": { + "External id": 253856,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918477223.883, "dur": 5.691, + "args": { + "External id": 253857,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477227.349, "dur": 0.667, + "args": { + "External id": 253858,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918477237.123, "dur": 20.412, + "args": { + "External id": 253859,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477239.078, "dur": 0.440, + "args": { + "External id": 253860,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477240.781, "dur": 0.315, + "args": { + "External id": 253861,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477242.400, "dur": 0.290, + "args": { + "External id": 253862,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477243.663, "dur": 0.212, + "args": { + "External id": 253863,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477245.412, "dur": 1.408, + "args": { + "External id": 253864,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477248.225, "dur": 0.288, + "args": { + "External id": 253865,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477249.909, "dur": 0.219, + "args": { + "External id": 253866,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477251.374, "dur": 0.361, + "args": { + "External id": 253867,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918477253.010, "dur": 0.373, + "args": { + "External id": 253868,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918477267.678, "dur": 20.526, + "args": { + "External id": 253869,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918477339.433, "dur": 108.893, + "args": { + "External id": 253870,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918477361.916, "dur": 83.015, + "args": { + "External id": 253871,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4014, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918477371.109, "dur": 69.883, + "args": { + "External id": 253872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918477461.263, "dur": 1.757, + "args": { + "External id": 253873,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4016, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918477538.016, "dur": 1579.589, + "args": { + "External id": 253874,"Sequence number": 2987540, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4017 + } + }, + { + "ph": "f", "id": 204, "pid": 4183438, "tid": 31367, "ts": 667918477538.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918477690.708, "dur": 105.361, + "args": { + "External id": 253875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918477838.860, "dur": 38.112, + "args": { + "External id": 253876,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918477897.002, "dur": 41.561, + "args": { + "External id": 253877,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918477950.858, "dur": 27.037, + "args": { + "External id": 253878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918477984.139, "dur": 32.990, + "args": { + "External id": 253879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478023.584, "dur": 20.256, + "args": { + "External id": 253880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478050.855, "dur": 29.888, + "args": { + "External id": 253881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918478101.554, "dur": 24.791, + "args": { + "External id": 253882,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918478143.267, "dur": 30.430, + "args": { + "External id": 253883,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918478191.450, "dur": 37.811, + "args": { + "External id": 253884,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918478245.037, "dur": 14.448, + "args": { + "External id": 253885,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478268.810, "dur": 39.082, + "args": { + "External id": 253886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478311.593, "dur": 31.084, + "args": { + "External id": 253887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918478369.997, "dur": 162.069, + "args": { + "External id": 253888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918478444.719, "dur": 5.899, + "args": { + "External id": 253889,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918478452.224, "dur": 2.731, + "args": { + "External id": 253890,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918478562.020, "dur": 24.635, + "args": { + "External id": 253891,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918478597.334, "dur": 13.773, + "args": { + "External id": 253892,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478618.473, "dur": 81.644, + "args": { + "External id": 253893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478708.547, "dur": 38.510, + "args": { + "External id": 253894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478754.669, "dur": 27.923, + "args": { + "External id": 253895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478787.217, "dur": 28.594, + "args": { + "External id": 253896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478821.834, "dur": 26.972, + "args": { + "External id": 253897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918478858.520, "dur": 27.318, + "args": { + "External id": 253898,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918478903.927, "dur": 24.840, + "args": { + "External id": 253899,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918478945.333, "dur": 23.124, + "args": { + "External id": 253900,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918478985.457, "dur": 16.568, + "args": { + "External id": 253901,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918479022.659, "dur": 26.419, + "args": { + "External id": 253902,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918479066.648, "dur": 16.002, + "args": { + "External id": 253903,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479164.831, "dur": 14.715, + "args": { + "External id": 253904,"Record function id": 0, "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479167.984, "dur": 10.717, + "args": { + "External id": 253905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479171.927, "dur": 5.836, + "args": { + "External id": 253906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479173.299, "dur": 4.372, + "args": { + "External id": 253907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479183.351, "dur": 5.437, + "args": { + "External id": 253908,"Record function id": 0, "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479185.262, "dur": 3.049, + "args": { + "External id": 253909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479186.015, "dur": 1.842, + "args": { + "External id": 253910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479186.637, "dur": 1.146, + "args": { + "External id": 253911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479214.557, "dur": 7.780, + "args": { + "External id": 253912,"Record function id": 0, "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479217.335, "dur": 4.303, + "args": { + "External id": 253913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479218.407, "dur": 2.419, + "args": { + "External id": 253914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479219.060, "dur": 1.528, + "args": { + "External id": 253915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479226.514, "dur": 3.877, + "args": { + "External id": 253916,"Record function id": 0, "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479227.748, "dur": 2.198, + "args": { + "External id": 253917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479228.294, "dur": 1.227, + "args": { + "External id": 253918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479228.676, "dur": 0.747, + "args": { + "External id": 253919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479233.452, "dur": 4.423, + "args": { + "External id": 253920,"Record function id": 0, "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479234.943, "dur": 2.468, + "args": { + "External id": 253921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479235.624, "dur": 1.377, + "args": { + "External id": 253922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479236.211, "dur": 0.690, + "args": { + "External id": 253923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479240.872, "dur": 4.373, + "args": { + "External id": 253924,"Record function id": 0, "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479242.290, "dur": 2.511, + "args": { + "External id": 253925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479242.921, "dur": 1.467, + "args": { + "External id": 253926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479243.528, "dur": 0.756, + "args": { + "External id": 253927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479248.406, "dur": 3.814, + "args": { + "External id": 253928,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479249.783, "dur": 2.023, + "args": { + "External id": 253929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479250.237, "dur": 1.154, + "args": { + "External id": 253930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479250.578, "dur": 0.711, + "args": { + "External id": 253931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479255.220, "dur": 4.552, + "args": { + "External id": 253932,"Record function id": 0, "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479256.797, "dur": 2.564, + "args": { + "External id": 253933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479257.317, "dur": 1.638, + "args": { + "External id": 253934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479258.009, "dur": 0.868, + "args": { + "External id": 253935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479266.875, "dur": 3.644, + "args": { + "External id": 253936,"Record function id": 0, "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918479268.005, "dur": 2.096, + "args": { + "External id": 253937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918479268.532, "dur": 1.156, + "args": { + "External id": 253938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918479268.879, "dur": 0.706, + "args": { + "External id": 253939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918479274.763, "dur": 14908.372, + "args": { + "External id": 253940,"Record function id": 0, "Sequence number": 2987539, "Fwd thread id": 1, "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918479275.892, "dur": 14898.242, + "args": { + "External id": 253941,"Sequence number": 2987539, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4084 + } + }, + { + "ph": "f", "id": 205, "pid": 4183438, "tid": 31367, "ts": 667918479275.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918479306.621, "dur": 44.166, + "args": { + "External id": 253942,"Record function id": 0, "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918479358.766, "dur": 67.825, + "args": { + "External id": 253943,"Record function id": 0, "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 4183438, "tid": 31367, + "ts": 667918479432.565, "dur": 14734.156, + "args": { + "External id": 253944,"Record function id": 0, "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918479525.253, "dur": 7.364, + "args": { + "External id": 253945,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918479541.523, "dur": 4.688, + "args": { + "External id": 253946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918479559.621, "dur": 13906.472, + "args": { + "External id": 253947,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918479576.165, "dur": 13881.151, + "args": { + "External id": 253948,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918479598.629, "dur": 14.086, + "args": { + "External id": 253949,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918479617.804, "dur": 13805.077, + "args": { + "External id": 253950,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918479620.489, "dur": 13801.647, + "args": { + "External id": 253951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918479624.407, "dur": 5.003, + "args": { + "External id": 253952,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918479648.691, "dur": 13769.982, + "args": { + "External id": 253953,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918493558.160, "dur": 9.249, + "args": { + "External id": 253954,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918493560.957, "dur": 6.074, + "args": { + "External id": 253955,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918493596.179, "dur": 291.781, + "args": { + "External id": 253956,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918493623.215, "dur": 259.795, + "args": { + "External id": 253957,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4100, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918493634.350, "dur": 243.323, + "args": { + "External id": 253958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918493906.195, "dur": 1.864, + "args": { + "External id": 253959,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4102, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918493963.820, "dur": 6.485, + "args": { + "External id": 253960,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494014.450, "dur": 1.457, + "args": { + "External id": 253961,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494031.686, "dur": 1.257, + "args": { + "External id": 253962,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494045.028, "dur": 1.391, + "args": { + "External id": 253963,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494057.454, "dur": 1.131, + "args": { + "External id": 253964,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494068.407, "dur": 1.417, + "args": { + "External id": 253965,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494083.319, "dur": 1.220, + "args": { + "External id": 253966,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494096.331, "dur": 0.853, + "args": { + "External id": 253967,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494106.389, "dur": 1.120, + "args": { + "External id": 253968,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918494215.911, "dur": 2715.135, + "args": { + "External id": 253969,"Record function id": 0, "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918494239.155, "dur": 1073.893, + "args": { + "External id": 253970,"Record function id": 0, "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918494254.951, "dur": 296.714, + "args": { + "External id": 253971,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494336.550, "dur": 4.714, + "args": { + "External id": 253972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494344.608, "dur": 0.812, + "args": { + "External id": 253973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494347.519, "dur": 1.279, + "args": { + "External id": 253974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494350.502, "dur": 0.819, + "args": { + "External id": 253975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494352.961, "dur": 1.149, + "args": { + "External id": 253976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494355.906, "dur": 1.086, + "args": { + "External id": 253977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494358.590, "dur": 1.340, + "args": { + "External id": 253978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494361.502, "dur": 0.918, + "args": { + "External id": 253979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494363.793, "dur": 1.177, + "args": { + "External id": 253980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918494366.559, "dur": 1.011, + "args": { + "External id": 253981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918494384.782, "dur": 138.334, + "args": { + "External id": 253982,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918494400.200, "dur": 118.594, + "args": { + "External id": 253983,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918494413.278, "dur": 13.483, + "args": { + "External id": 253984,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918494429.571, "dur": 63.609, + "args": { + "External id": 253985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918494432.209, "dur": 60.674, + "args": { + "External id": 253986,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494436.095, "dur": 6.454, + "args": { + "External id": 253987,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918494444.394, "dur": 47.910, + "args": { + "External id": 253988,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 4183438, "tid": 31367, + "ts": 667918494636.928, "dur": 668.357, + "args": { + "External id": 253989,"Record function id": 0, "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918494694.888, "dur": 597.857, + "args": { + "External id": 253990,"Record function id": 0, "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918494763.862, "dur": 6.562, + "args": { + "External id": 253991,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918494786.246, "dur": 30.556, + "args": { + "External id": 253992,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494790.696, "dur": 1.960, + "args": { + "External id": 253993,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494795.475, "dur": 0.503, + "args": { + "External id": 253994,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494797.134, "dur": 0.966, + "args": { + "External id": 253995,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494800.370, "dur": 0.543, + "args": { + "External id": 253996,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494802.104, "dur": 0.710, + "args": { + "External id": 253997,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494804.053, "dur": 0.956, + "args": { + "External id": 253998,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494809.464, "dur": 0.506, + "args": { + "External id": 253999,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494810.626, "dur": 0.704, + "args": { + "External id": 254000,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494813.030, "dur": 0.730, + "args": { + "External id": 254001,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918494827.030, "dur": 34.278, + "args": { + "External id": 254002,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918494890.863, "dur": 97.070, + "args": { + "External id": 254003,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918494901.090, "dur": 3.441, + "args": { + "External id": 254004,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918494909.733, "dur": 10.123, + "args": { + "External id": 254005,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918494914.148, "dur": 5.316, + "args": { + "External id": 254006,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494917.748, "dur": 0.644, + "args": { + "External id": 254007,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918494927.422, "dur": 24.709, + "args": { + "External id": 254008,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494929.494, "dur": 0.667, + "args": { + "External id": 254009,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494931.625, "dur": 0.936, + "args": { + "External id": 254010,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494933.815, "dur": 0.860, + "args": { + "External id": 254011,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494935.905, "dur": 0.878, + "args": { + "External id": 254012,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494937.885, "dur": 0.933, + "args": { + "External id": 254013,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494939.833, "dur": 0.910, + "args": { + "External id": 254014,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494942.271, "dur": 0.545, + "args": { + "External id": 254015,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494943.958, "dur": 0.637, + "args": { + "External id": 254016,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918494946.025, "dur": 0.664, + "args": { + "External id": 254017,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918494962.233, "dur": 18.276, + "args": { + "External id": 254018,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918495033.151, "dur": 183.529, + "args": { + "External id": 254019,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918495084.952, "dur": 127.081, + "args": { + "External id": 254020,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4163, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918495096.876, "dur": 92.381, + "args": { + "External id": 254021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918495231.997, "dur": 2.137, + "args": { + "External id": 254022,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4165, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918495320.716, "dur": 1591.597, + "args": { + "External id": 254023,"Sequence number": 2987538, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4166 + } + }, + { + "ph": "f", "id": 206, "pid": 4183438, "tid": 31367, "ts": 667918495320.716, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918495434.316, "dur": 107.993, + "args": { + "External id": 254024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918495580.818, "dur": 36.426, + "args": { + "External id": 254025,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918495633.014, "dur": 82.178, + "args": { + "External id": 254026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918495728.458, "dur": 31.855, + "args": { + "External id": 254027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918495766.316, "dur": 33.682, + "args": { + "External id": 254028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918495806.487, "dur": 23.813, + "args": { + "External id": 254029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918495837.720, "dur": 28.848, + "args": { + "External id": 254030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918495890.598, "dur": 23.573, + "args": { + "External id": 254031,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918495937.365, "dur": 26.839, + "args": { + "External id": 254032,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918495983.690, "dur": 15.933, + "args": { + "External id": 254033,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918496011.427, "dur": 13.578, + "args": { + "External id": 254034,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496032.727, "dur": 36.606, + "args": { + "External id": 254035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496072.310, "dur": 29.407, + "args": { + "External id": 254036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918496128.709, "dur": 191.088, + "args": { + "External id": 254037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918496221.050, "dur": 6.731, + "args": { + "External id": 254038,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918496229.747, "dur": 3.555, + "args": { + "External id": 254039,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918496349.985, "dur": 23.317, + "args": { + "External id": 254040,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918496384.697, "dur": 14.517, + "args": { + "External id": 254041,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496407.095, "dur": 46.753, + "args": { + "External id": 254042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496459.763, "dur": 35.750, + "args": { + "External id": 254043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496502.183, "dur": 27.353, + "args": { + "External id": 254044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496534.129, "dur": 30.858, + "args": { + "External id": 254045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496570.369, "dur": 26.525, + "args": { + "External id": 254046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918496602.675, "dur": 31.285, + "args": { + "External id": 254047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918496686.693, "dur": 42.776, + "args": { + "External id": 254048,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918496757.025, "dur": 30.154, + "args": { + "External id": 254049,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918496801.398, "dur": 17.926, + "args": { + "External id": 254050,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918496835.400, "dur": 15.612, + "args": { + "External id": 254051,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918496862.530, "dur": 18.185, + "args": { + "External id": 254052,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496956.158, "dur": 16.465, + "args": { + "External id": 254053,"Record function id": 0, "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496959.607, "dur": 12.052, + "args": { + "External id": 254054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918496964.210, "dur": 6.630, + "args": { + "External id": 254055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918496965.709, "dur": 5.021, + "args": { + "External id": 254056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496976.847, "dur": 5.396, + "args": { + "External id": 254057,"Record function id": 0, "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496978.261, "dur": 3.482, + "args": { + "External id": 254058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918496979.212, "dur": 2.115, + "args": { + "External id": 254059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918496979.967, "dur": 1.265, + "args": { + "External id": 254060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496985.573, "dur": 4.419, + "args": { + "External id": 254061,"Record function id": 0, "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496986.872, "dur": 2.726, + "args": { + "External id": 254062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918496987.668, "dur": 1.542, + "args": { + "External id": 254063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918496988.307, "dur": 0.829, + "args": { + "External id": 254064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496993.234, "dur": 4.571, + "args": { + "External id": 254065,"Record function id": 0, "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918496994.531, "dur": 2.907, + "args": { + "External id": 254066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918496995.454, "dur": 1.567, + "args": { + "External id": 254067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918496996.114, "dur": 0.795, + "args": { + "External id": 254068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497001.039, "dur": 7.322, + "args": { + "External id": 254069,"Record function id": 0, "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497002.027, "dur": 5.934, + "args": { + "External id": 254070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918497006.032, "dur": 1.552, + "args": { + "External id": 254071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918497006.437, "dur": 1.067, + "args": { + "External id": 254072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497011.633, "dur": 4.131, + "args": { + "External id": 254073,"Record function id": 0, "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497012.744, "dur": 2.627, + "args": { + "External id": 254074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918497013.208, "dur": 1.778, + "args": { + "External id": 254075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918497013.994, "dur": 0.913, + "args": { + "External id": 254076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497018.885, "dur": 4.183, + "args": { + "External id": 254077,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497020.057, "dur": 2.618, + "args": { + "External id": 254078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918497020.750, "dur": 1.529, + "args": { + "External id": 254079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918497021.365, "dur": 0.818, + "args": { + "External id": 254080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497026.060, "dur": 4.367, + "args": { + "External id": 254081,"Record function id": 0, "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497027.366, "dur": 2.656, + "args": { + "External id": 254082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918497028.003, "dur": 1.624, + "args": { + "External id": 254083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918497028.643, "dur": 0.880, + "args": { + "External id": 254084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497033.631, "dur": 3.781, + "args": { + "External id": 254085,"Record function id": 0, "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918497034.589, "dur": 2.443, + "args": { + "External id": 254086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918497035.088, "dur": 1.561, + "args": { + "External id": 254087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918497035.578, "dur": 0.968, + "args": { + "External id": 254088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918497041.109, "dur": 14837.664, + "args": { + "External id": 254089,"Record function id": 0, "Sequence number": 2987537, "Fwd thread id": 1, "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918497042.321, "dur": 14827.655, + "args": { + "External id": 254090,"Sequence number": 2987537, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4233 + } + }, + { + "ph": "f", "id": 207, "pid": 4183438, "tid": 31367, "ts": 667918497042.321, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918497074.760, "dur": 37.168, + "args": { + "External id": 254091,"Record function id": 0, "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918497118.986, "dur": 65.993, + "args": { + "External id": 254092,"Record function id": 0, "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 4183438, "tid": 31367, + "ts": 667918497214.676, "dur": 14647.427, + "args": { + "External id": 254093,"Record function id": 0, "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918497305.581, "dur": 7.112, + "args": { + "External id": 254094,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918497322.644, "dur": 5.383, + "args": { + "External id": 254095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918497341.427, "dur": 13769.234, + "args": { + "External id": 254096,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918497354.542, "dur": 13747.357, + "args": { + "External id": 254097,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918497383.635, "dur": 18.036, + "args": { + "External id": 254098,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918497406.524, "dur": 13658.495, + "args": { + "External id": 254099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918497409.027, "dur": 13654.745, + "args": { + "External id": 254100,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918497412.637, "dur": 5.683, + "args": { + "External id": 254101,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918497419.997, "dur": 13639.901, + "args": { + "External id": 254102,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918511211.748, "dur": 10.671, + "args": { + "External id": 254103,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918511214.732, "dur": 7.123, + "args": { + "External id": 254104,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918511252.756, "dur": 299.803, + "args": { + "External id": 254105,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918511278.075, "dur": 269.731, + "args": { + "External id": 254106,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4249, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918511291.052, "dur": 251.737, + "args": { + "External id": 254107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918511568.536, "dur": 2.209, + "args": { + "External id": 254108,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4251, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511623.717, "dur": 7.123, + "args": { + "External id": 254109,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511710.297, "dur": 2.632, + "args": { + "External id": 254110,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511729.701, "dur": 1.442, + "args": { + "External id": 254111,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511743.498, "dur": 1.143, + "args": { + "External id": 254112,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511756.001, "dur": 1.178, + "args": { + "External id": 254113,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511766.833, "dur": 1.154, + "args": { + "External id": 254114,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511779.529, "dur": 1.237, + "args": { + "External id": 254115,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511790.999, "dur": 1.196, + "args": { + "External id": 254116,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918511801.721, "dur": 1.089, + "args": { + "External id": 254117,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918511895.512, "dur": 2646.751, + "args": { + "External id": 254118,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918511914.827, "dur": 1012.092, + "args": { + "External id": 254119,"Record function id": 0, "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918511929.104, "dur": 326.435, + "args": { + "External id": 254120,"Record function id": 0, "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512012.665, "dur": 4.371, + "args": { + "External id": 254121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512020.473, "dur": 1.348, + "args": { + "External id": 254122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512023.597, "dur": 1.121, + "args": { + "External id": 254123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512026.537, "dur": 1.274, + "args": { + "External id": 254124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512029.335, "dur": 0.849, + "args": { + "External id": 254125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512031.772, "dur": 0.928, + "args": { + "External id": 254126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512034.301, "dur": 0.910, + "args": { + "External id": 254127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512036.906, "dur": 1.218, + "args": { + "External id": 254128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512039.818, "dur": 1.260, + "args": { + "External id": 254129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918512043.078, "dur": 1.085, + "args": { + "External id": 254130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918512061.638, "dur": 162.087, + "args": { + "External id": 254131,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918512077.870, "dur": 140.650, + "args": { + "External id": 254132,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918512090.720, "dur": 13.434, + "args": { + "External id": 254133,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918512107.214, "dur": 65.656, + "args": { + "External id": 254134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918512111.436, "dur": 61.115, + "args": { + "External id": 254135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512115.437, "dur": 5.743, + "args": { + "External id": 254136,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918512124.754, "dur": 47.155, + "args": { + "External id": 254137,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 4183438, "tid": 31367, + "ts": 667918512345.293, "dur": 574.560, + "args": { + "External id": 254138,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918512360.672, "dur": 546.875, + "args": { + "External id": 254139,"Record function id": 0, "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918512424.596, "dur": 5.714, + "args": { + "External id": 254140,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918512445.549, "dur": 27.097, + "args": { + "External id": 254141,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512450.026, "dur": 1.723, + "args": { + "External id": 254142,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512454.613, "dur": 0.654, + "args": { + "External id": 254143,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512456.448, "dur": 0.609, + "args": { + "External id": 254144,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512458.228, "dur": 0.785, + "args": { + "External id": 254145,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512461.056, "dur": 0.502, + "args": { + "External id": 254146,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512463.270, "dur": 0.813, + "args": { + "External id": 254147,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512464.989, "dur": 1.084, + "args": { + "External id": 254148,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512467.238, "dur": 0.778, + "args": { + "External id": 254149,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512469.252, "dur": 0.659, + "args": { + "External id": 254150,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918512482.770, "dur": 29.198, + "args": { + "External id": 254151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 31367, + "ts": 667918512544.017, "dur": 98.131, + "args": { + "External id": 254152,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918512553.993, "dur": 3.375, + "args": { + "External id": 254153,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 31367, + "ts": 667918512562.634, "dur": 9.581, + "args": { + "External id": 254154,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 31367, + "ts": 667918512566.886, "dur": 4.938, + "args": { + "External id": 254155,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512569.952, "dur": 0.711, + "args": { + "External id": 254156,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 31367, + "ts": 667918512578.572, "dur": 28.604, + "args": { + "External id": 254157,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512580.924, "dur": 2.186, + "args": { + "External id": 254158,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512584.211, "dur": 0.558, + "args": { + "External id": 254159,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512586.113, "dur": 0.770, + "args": { + "External id": 254160,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512589.318, "dur": 0.638, + "args": { + "External id": 254161,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512591.352, "dur": 0.910, + "args": { + "External id": 254162,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512593.396, "dur": 0.543, + "args": { + "External id": 254163,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512596.008, "dur": 0.801, + "args": { + "External id": 254164,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512597.955, "dur": 0.739, + "args": { + "External id": 254165,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918512599.852, "dur": 1.056, + "args": { + "External id": 254166,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 31367, + "ts": 667918512617.539, "dur": 17.565, + "args": { + "External id": 254167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 31367, + "ts": 667918512727.960, "dur": 115.175, + "args": { + "External id": 254168,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918512752.793, "dur": 86.701, + "args": { + "External id": 254169,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4312, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 31367, + "ts": 667918512762.418, "dur": 72.873, + "args": { + "External id": 254170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918512856.630, "dur": 1.914, + "args": { + "External id": 254171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4314, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918512934.314, "dur": 1587.751, + "args": { + "External id": 254172,"Sequence number": 2987536, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4315 + } + }, + { + "ph": "f", "id": 208, "pid": 4183438, "tid": 31367, "ts": 667918512934.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513043.330, "dur": 102.248, + "args": { + "External id": 254173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918513184.781, "dur": 59.179, + "args": { + "External id": 254174,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918513263.590, "dur": 48.254, + "args": { + "External id": 254175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513322.227, "dur": 26.919, + "args": { + "External id": 254176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513355.095, "dur": 36.849, + "args": { + "External id": 254177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513398.977, "dur": 21.897, + "args": { + "External id": 254178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513427.672, "dur": 29.601, + "args": { + "External id": 254179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918513480.294, "dur": 26.723, + "args": { + "External id": 254180,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918513523.613, "dur": 31.005, + "args": { + "External id": 254181,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918513571.083, "dur": 18.400, + "args": { + "External id": 254182,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918513601.826, "dur": 15.317, + "args": { + "External id": 254183,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513625.160, "dur": 72.745, + "args": { + "External id": 254184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918513703.884, "dur": 35.401, + "args": { + "External id": 254185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918513769.373, "dur": 176.245, + "args": { + "External id": 254186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918513847.744, "dur": 6.567, + "args": { + "External id": 254187,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918513856.011, "dur": 3.166, + "args": { + "External id": 254188,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918513976.148, "dur": 24.517, + "args": { + "External id": 254189,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918514011.440, "dur": 15.856, + "args": { + "External id": 254190,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514034.966, "dur": 51.736, + "args": { + "External id": 254191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514092.773, "dur": 32.752, + "args": { + "External id": 254192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514131.817, "dur": 27.527, + "args": { + "External id": 254193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514167.017, "dur": 45.629, + "args": { + "External id": 254194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514221.614, "dur": 31.157, + "args": { + "External id": 254195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918514260.617, "dur": 30.338, + "args": { + "External id": 254196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918514317.723, "dur": 33.844, + "args": { + "External id": 254197,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918514371.008, "dur": 27.219, + "args": { + "External id": 254198,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918514412.014, "dur": 16.476, + "args": { + "External id": 254199,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918514443.484, "dur": 18.127, + "args": { + "External id": 254200,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918514473.021, "dur": 16.354, + "args": { + "External id": 254201,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514565.037, "dur": 19.522, + "args": { + "External id": 254202,"Record function id": 0, "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514568.682, "dur": 15.047, + "args": { + "External id": 254203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514576.239, "dur": 6.655, + "args": { + "External id": 254204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514577.890, "dur": 4.908, + "args": { + "External id": 254205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514588.482, "dur": 5.053, + "args": { + "External id": 254206,"Record function id": 0, "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514590.204, "dur": 2.878, + "args": { + "External id": 254207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514590.917, "dur": 1.741, + "args": { + "External id": 254208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514591.522, "dur": 1.045, + "args": { + "External id": 254209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514596.750, "dur": 4.620, + "args": { + "External id": 254210,"Record function id": 0, "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514598.097, "dur": 2.898, + "args": { + "External id": 254211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514598.600, "dur": 2.003, + "args": { + "External id": 254212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514599.458, "dur": 1.056, + "args": { + "External id": 254213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514604.419, "dur": 3.539, + "args": { + "External id": 254214,"Record function id": 0, "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514605.394, "dur": 2.181, + "args": { + "External id": 254215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514606.038, "dur": 1.148, + "args": { + "External id": 254216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514606.334, "dur": 0.754, + "args": { + "External id": 254217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514611.027, "dur": 5.527, + "args": { + "External id": 254218,"Record function id": 0, "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514612.650, "dur": 3.482, + "args": { + "External id": 254219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514613.437, "dur": 2.300, + "args": { + "External id": 254220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514614.420, "dur": 1.187, + "args": { + "External id": 254221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514619.813, "dur": 4.773, + "args": { + "External id": 254222,"Record function id": 0, "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514621.491, "dur": 2.706, + "args": { + "External id": 254223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514622.148, "dur": 1.635, + "args": { + "External id": 254224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514622.713, "dur": 0.973, + "args": { + "External id": 254225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514627.755, "dur": 4.581, + "args": { + "External id": 254226,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514629.217, "dur": 2.722, + "args": { + "External id": 254227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514629.933, "dur": 1.544, + "args": { + "External id": 254228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514630.250, "dur": 1.129, + "args": { + "External id": 254229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514635.358, "dur": 3.766, + "args": { + "External id": 254230,"Record function id": 0, "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514636.649, "dur": 2.086, + "args": { + "External id": 254231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514637.121, "dur": 1.222, + "args": { + "External id": 254232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514637.451, "dur": 0.799, + "args": { + "External id": 254233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514642.323, "dur": 3.487, + "args": { + "External id": 254234,"Record function id": 0, "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918514643.250, "dur": 2.161, + "args": { + "External id": 254235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918514643.704, "dur": 1.321, + "args": { + "External id": 254236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918514644.028, "dur": 0.905, + "args": { + "External id": 254237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918514649.753, "dur": 14958.560, + "args": { + "External id": 254238,"Record function id": 0, "Sequence number": 2987535, "Fwd thread id": 1, "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918514686.423, "dur": 14912.983, + "args": { + "External id": 254239,"Sequence number": 2987535, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4382 + } + }, + { + "ph": "f", "id": 209, "pid": 4183438, "tid": 31367, "ts": 667918514686.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918514718.362, "dur": 43.906, + "args": { + "External id": 254240,"Record function id": 0, "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918514770.045, "dur": 73.710, + "args": { + "External id": 254241,"Record function id": 0, "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 4183438, "tid": 31367, + "ts": 667918514850.484, "dur": 14740.941, + "args": { + "External id": 254242,"Record function id": 0, "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918514943.909, "dur": 7.207, + "args": { + "External id": 254243,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918514960.814, "dur": 5.235, + "args": { + "External id": 254244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918514979.358, "dur": 13939.606, + "args": { + "External id": 254245,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918514995.988, "dur": 13914.610, + "args": { + "External id": 254246,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918515021.013, "dur": 14.782, + "args": { + "External id": 254247,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918515040.091, "dur": 13834.260, + "args": { + "External id": 254248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918515043.547, "dur": 13830.039, + "args": { + "External id": 254249,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918515047.378, "dur": 4.605, + "args": { + "External id": 254250,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918515053.836, "dur": 13816.297, + "args": { + "External id": 254251,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918529007.674, "dur": 9.660, + "args": { + "External id": 254252,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918529010.542, "dur": 6.428, + "args": { + "External id": 254253,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918529045.718, "dur": 267.632, + "args": { + "External id": 254254,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918529071.360, "dur": 237.131, + "args": { + "External id": 254255,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4398, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918529082.418, "dur": 221.053, + "args": { + "External id": 254256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918529331.492, "dur": 2.170, + "args": { + "External id": 254257,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4400, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529387.729, "dur": 6.632, + "args": { + "External id": 254258,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529439.030, "dur": 1.504, + "args": { + "External id": 254259,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529455.359, "dur": 1.172, + "args": { + "External id": 254260,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529468.281, "dur": 1.165, + "args": { + "External id": 254261,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529480.341, "dur": 1.221, + "args": { + "External id": 254262,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529491.782, "dur": 1.146, + "args": { + "External id": 254263,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529504.599, "dur": 1.265, + "args": { + "External id": 254264,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529517.288, "dur": 1.004, + "args": { + "External id": 254265,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529529.323, "dur": 1.220, + "args": { + "External id": 254266,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918529621.888, "dur": 2086.317, + "args": { + "External id": 254267,"Record function id": 0, "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918529642.386, "dur": 471.509, + "args": { + "External id": 254268,"Record function id": 0, "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918529696.276, "dur": 315.080, + "args": { + "External id": 254269,"Record function id": 0, "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529790.599, "dur": 5.246, + "args": { + "External id": 254270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529799.526, "dur": 1.245, + "args": { + "External id": 254271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529802.665, "dur": 0.953, + "args": { + "External id": 254272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529805.521, "dur": 1.432, + "args": { + "External id": 254273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529808.305, "dur": 1.169, + "args": { + "External id": 254274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529811.120, "dur": 0.987, + "args": { + "External id": 254275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529813.778, "dur": 1.292, + "args": { + "External id": 254276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529816.780, "dur": 0.959, + "args": { + "External id": 254277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529819.388, "dur": 1.042, + "args": { + "External id": 254278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918529821.863, "dur": 1.360, + "args": { + "External id": 254279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918529840.029, "dur": 142.785, + "args": { + "External id": 254280,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 31367, + "ts": 667918529856.255, "dur": 122.233, + "args": { + "External id": 254281,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918529868.720, "dur": 14.276, + "args": { + "External id": 254282,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918529885.412, "dur": 64.747, + "args": { + "External id": 254283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918529889.091, "dur": 60.764, + "args": { + "External id": 254284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918529893.074, "dur": 5.465, + "args": { + "External id": 254285,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918529900.207, "dur": 49.005, + "args": { + "External id": 254286,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918530120.712, "dur": 1528.695, + "args": { + "External id": 254287,"Sequence number": 2987534, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4430 + } + }, + { + "ph": "f", "id": 210, "pid": 4183438, "tid": 31367, "ts": 667918530120.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530251.377, "dur": 105.172, + "args": { + "External id": 254288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [2816, 1], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 4183438, "tid": 31367, + "ts": 667918530393.338, "dur": 38.434, + "args": { + "External id": 254289,"kernel_hash": "c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/4p/c4po4gnedw4lkxajnn6p3wyqqi24sdacil3q3sdl47drs4et3op3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [65536, 2816], [16, 4096, 2816], [16, 4096, 2816], [16, 4096, 2816], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 4183438, "tid": 31367, + "ts": 667918530448.332, "dur": 41.967, + "args": { + "External id": 254290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 1024], [0, 2816, 1], [2883584, 2816, 1]], "Input Dims": [[1, 1024, 65536], [1, 65536, 2816], [1, 1024, 2816]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530499.468, "dur": 26.715, + "args": { + "External id": 254291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530533.056, "dur": 33.116, + "args": { + "External id": 254292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530573.417, "dur": 20.693, + "args": { + "External id": 254293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2816], [1024, 1], [1024, 1]], "Input Dims": [[2816, 65536], [65536, 1024], [2816, 1024]], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530602.464, "dur": 29.881, + "args": { + "External id": 254294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 4183438, "tid": 31367, + "ts": 667918530691.070, "dur": 25.709, + "args": { + "External id": 254295,"kernel_hash": "cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/oh/cohoku7zuxcsdfkchrrz4kspw7usafsc5d3yc5qjdd22veuugeck.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 4183438, "tid": 31367, + "ts": 667918530736.335, "dur": 27.512, + "args": { + "External id": 254296,"kernel_hash": "clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lm/clm7bfnx7pnl5oqs77dih4ccwole4f5pdm4hil7p5ob7aogwgcwv.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918530786.952, "dur": 17.015, + "args": { + "External id": 254297,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918530816.058, "dur": 14.960, + "args": { + "External id": 254298,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530839.365, "dur": 43.500, + "args": { + "External id": 254299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918530886.432, "dur": 30.345, + "args": { + "External id": 254300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 4183438, "tid": 31367, + "ts": 667918530948.375, "dur": 163.625, + "args": { + "External id": 254301,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [65536, 4096, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 16, 4096], [16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918531021.453, "dur": 6.646, + "args": { + "External id": 254302,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918531029.955, "dur": 2.983, + "args": { + "External id": 254303,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918531145.035, "dur": 22.178, + "args": { + "External id": 254304,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 31367, + "ts": 667918531181.965, "dur": 32.948, + "args": { + "External id": 254305,"kernel_hash": "cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/xy/cxy3rbhc5coe5hycpj4oiqwzfsszxnphpxg4hr2bzokiu5chquvc.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531226.523, "dur": 50.511, + "args": { + "External id": 254306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531283.930, "dur": 34.258, + "args": { + "External id": 254307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531324.434, "dur": 27.547, + "args": { + "External id": 254308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531357.623, "dur": 28.734, + "args": { + "External id": 254309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531392.538, "dur": 29.956, + "args": { + "External id": 254310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [1024, 1], [1024, 1]], "Input Dims": [[1024, 65536], [65536, 1024], [1024, 1024]], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 31367, + "ts": 667918531430.676, "dur": 27.904, + "args": { + "External id": 254311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 4183438, "tid": 31367, + "ts": 667918531479.075, "dur": 20.726, + "args": { + "External id": 254312,"kernel_hash": "cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/xq/cxquaruus47mvrquq3gfsj3is6jdepxek5jt3bsov27xnlnlypmz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [65536, 1024], []], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 4183438, "tid": 31367, + "ts": 667918531517.822, "dur": 22.433, + "args": { + "External id": 254313,"kernel_hash": "chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "1024", "1", "497", "132", "True", "1024", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/hl/chlkjt3hicxy6k2bpahxa6z5u7fh2potkgg72keqyi6pjjgwpucm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [1024], [65536, 1024], [65536, 1024], [132, 1024], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 4183438, "tid": 31367, + "ts": 667918531554.217, "dur": 15.723, + "args": { + "External id": 254314,"kernel_hash": "cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyztcfbseh2txumvnyll7ahf6t5d5tqi6dmacahyih75kq7cgxcr.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [2048, 1, 1024], [], []], "Input Dims": [[132, 1024], [1, 1024, 2], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 4183438, "tid": 31367, + "ts": 667918531583.063, "dur": 12.974, + "args": { + "External id": 254315,"kernel_hash": "cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y", "grid": "grid(1024,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "1024", "2"], "kernel_file": "/tmp/torchinductor_cvm/th/cthfckwcw2pzhsppv2sbhauqwtg23csgbsalqrbuub6hj6pj4p7y.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1, 1024], [1024, 1], [], []], "Input Dims": [[1, 1024, 2], [1, 1024], [], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 4183438, "tid": 31367, + "ts": 667918531606.771, "dur": 15.320, + "args": { + "External id": 254316,"kernel_hash": "csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqhbcpoi4yxxvcel4c4wfiuwwaxltg3nqs73dgwqy7o36qde5hr.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], [16, 4096, 1024], []], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531731.006, "dur": 15.941, + "args": { + "External id": 254317,"Record function id": 0, "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531734.581, "dur": 11.459, + "args": { + "External id": 254318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531739.110, "dur": 6.034, + "args": { + "External id": 254319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531740.510, "dur": 4.538, + "args": { + "External id": 254320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531751.005, "dur": 5.810, + "args": { + "External id": 254321,"Record function id": 0, "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531752.668, "dur": 3.672, + "args": { + "External id": 254322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531753.735, "dur": 2.064, + "args": { + "External id": 254323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531754.400, "dur": 1.327, + "args": { + "External id": 254324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531760.106, "dur": 5.087, + "args": { + "External id": 254325,"Record function id": 0, "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531762.075, "dur": 2.700, + "args": { + "External id": 254326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531762.988, "dur": 1.371, + "args": { + "External id": 254327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531763.487, "dur": 0.760, + "args": { + "External id": 254328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1024]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531768.322, "dur": 5.608, + "args": { + "External id": 254329,"Record function id": 0, "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531770.439, "dur": 3.094, + "args": { + "External id": 254330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531771.291, "dur": 1.800, + "args": { + "External id": 254331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531771.747, "dur": 1.237, + "args": { + "External id": 254332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531777.034, "dur": 4.805, + "args": { + "External id": 254333,"Record function id": 0, "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531778.691, "dur": 2.710, + "args": { + "External id": 254334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531779.394, "dur": 1.596, + "args": { + "External id": 254335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531779.778, "dur": 1.107, + "args": { + "External id": 254336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531784.904, "dur": 4.040, + "args": { + "External id": 254337,"Record function id": 0, "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531786.170, "dur": 2.334, + "args": { + "External id": 254338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531786.761, "dur": 1.311, + "args": { + "External id": 254339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531787.075, "dur": 0.893, + "args": { + "External id": 254340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531791.999, "dur": 4.258, + "args": { + "External id": 254341,"Record function id": 0, "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531793.439, "dur": 2.429, + "args": { + "External id": 254342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531793.912, "dur": 1.513, + "args": { + "External id": 254343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531794.608, "dur": 0.717, + "args": { + "External id": 254344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531799.412, "dur": 4.406, + "args": { + "External id": 254345,"Record function id": 0, "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531801.114, "dur": 2.309, + "args": { + "External id": 254346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531801.890, "dur": 1.115, + "args": { + "External id": 254347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531802.233, "dur": 0.671, + "args": { + "External id": 254348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531807.168, "dur": 3.540, + "args": { + "External id": 254349,"Record function id": 0, "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918531808.164, "dur": 2.150, + "args": { + "External id": 254350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918531808.696, "dur": 1.223, + "args": { + "External id": 254351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918531808.964, "dur": 0.851, + "args": { + "External id": 254352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918531814.716, "dur": 15832.429, + "args": { + "External id": 254353,"Record function id": 0, "Sequence number": 2987533, "Fwd thread id": 1, "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918531815.952, "dur": 15822.738, + "args": { + "External id": 254354,"Sequence number": 2987533, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4497 + } + }, + { + "ph": "f", "id": 211, "pid": 4183438, "tid": 31367, "ts": 667918531815.952, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918531846.923, "dur": 41.153, + "args": { + "External id": 254355,"Record function id": 0, "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918531895.245, "dur": 70.257, + "args": { + "External id": 254356,"Record function id": 0, "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 4183438, "tid": 31367, + "ts": 667918531971.251, "dur": 15659.874, + "args": { + "External id": 254357,"Record function id": 0, "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918532076.913, "dur": 9.068, + "args": { + "External id": 254358,"Record function id": 0, "Concrete Inputs": ["[12847104]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918532096.143, "dur": 4.902, + "args": { + "External id": 254359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918532114.524, "dur": 14829.468, + "args": { + "External id": 254360,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918532131.040, "dur": 14804.570, + "args": { + "External id": 254361,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [1024, 1], [1024, 1], [1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], [], [], [1605888, 1]], "Input Dims": [[[1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], [], [], [8, 1605888]], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918532150.319, "dur": 14.356, + "args": { + "External id": 254362,"Record function id": 0, "Concrete Inputs": ["[3194]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918532168.722, "dur": 14728.539, + "args": { + "External id": 254363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918532171.481, "dur": 14724.987, + "args": { + "External id": 254364,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3194], [], [], [], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918532175.911, "dur": 8.709, + "args": { + "External id": 254365,"Record function id": 0, "Concrete Inputs": ["[3194]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918532186.659, "dur": 14706.190, + "args": { + "External id": 254366,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3194], [3194], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918547035.996, "dur": 10.101, + "args": { + "External id": 254367,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[12847104], [], [], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918547039.277, "dur": 6.410, + "args": { + "External id": 254368,"Record function id": 0, "Concrete Inputs": ["[1605888]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918547074.229, "dur": 277.983, + "args": { + "External id": 254369,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[1605888], [12847104], [], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918547100.818, "dur": 246.522, + "args": { + "External id": 254370,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1605888, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[12847104], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4513, "In msg nelems": 12847104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918547112.514, "dur": 229.411, + "args": { + "External id": 254371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[12847104]], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918547370.306, "dur": 2.481, + "args": { + "External id": 254372,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4515, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547426.678, "dur": 7.153, + "args": { + "External id": 254373,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547477.848, "dur": 1.697, + "args": { + "External id": 254374,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547496.281, "dur": 1.234, + "args": { + "External id": 254375,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "131200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547509.292, "dur": 1.133, + "args": { + "External id": 254376,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "262272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547522.133, "dur": 1.247, + "args": { + "External id": 254377,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "393344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547533.866, "dur": 0.935, + "args": { + "External id": 254378,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547545.942, "dur": 0.884, + "args": { + "External id": 254379,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547557.707, "dur": 1.049, + "args": { + "External id": 254380,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "884992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918547569.289, "dur": 0.881, + "args": { + "External id": 254381,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "1245440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918547697.835, "dur": 276.821, + "args": { + "External id": 254382,"Record function id": 0, "Sequence number": 2987532, "Fwd thread id": 1, "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 4183438, "tid": 31367, + "ts": 667918547701.021, "dur": 265.556, + "args": { + "External id": 254383,"Sequence number": 2987532, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4526 + } + }, + { + "ph": "f", "id": 212, "pid": 4183438, "tid": 31367, "ts": 667918547701.021, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 4183438, "tid": 31367, + "ts": 667918547820.718, "dur": 46.713, + "args": { + "External id": 254384,"kernel_hash": "c62a63oyn7avzuzdtrtz4qdvhfnops7fz2f53gxtomcyzc7spley", "grid": "grid(32768000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "32768000"], "kernel_file": "/tmp/torchinductor_cvm/62/c62a63oyn7avzuzdtrtz4qdvhfnops7fz2f53gxtomcyzc7spley.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[32000, 1024], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 4183438, "tid": 31367, + "ts": 667918547882.010, "dur": 28.591, + "args": { + "External id": 254385,"kernel_hash": "cerfxu7bhbn6ulmm7kpiu2toaha32yw6zzr2afv5vcf2lszoj3v4", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/er/cerfxu7bhbn6ulmm7kpiu2toaha32yw6zzr2afv5vcf2lszoj3v4.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096], [16, 4096, 1024], [32000, 1024], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 4183438, "tid": 31367, + "ts": 667918547928.336, "dur": 21.761, + "args": { + "External id": 254386,"kernel_hash": "cny6en3gok6pbitjqamstlopano7gsqxbj5xiwxezt7bsp54rmil", "grid": "grid(32768000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "32768000"], "kernel_file": "/tmp/torchinductor_cvm/ny/cny6en3gok6pbitjqamstlopano7gsqxbj5xiwxezt7bsp54rmil.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918547984.382, "dur": 15.854, + "args": { + "External id": 254387,"Record function id": 0, "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 4183438, "tid": 31367, + "ts": 667918547987.750, "dur": 11.523, + "args": { + "External id": 254388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 31367, + "ts": 667918547992.162, "dur": 6.347, + "args": { + "External id": 254389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 31367, + "ts": 667918547993.814, "dur": 4.575, + "args": { + "External id": 254390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 4183438, "tid": 31367, + "ts": 667918548023.145, "dur": 7089.397, + "args": { + "External id": 254391,"Record function id": 0, "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 4183438, "tid": 31367, + "ts": 667918548041.596, "dur": 38.868, + "args": { + "External id": 254392,"Record function id": 0, "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 4183438, "tid": 31367, + "ts": 667918548086.989, "dur": 265.888, + "args": { + "External id": 254393,"Record function id": 0, "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 4183438, "tid": 31367, + "ts": 667918548361.356, "dur": 6549.838, + "args": { + "External id": 254394,"Record function id": 0, "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918548478.265, "dur": 7.587, + "args": { + "External id": 254395,"Record function id": 0, "Concrete Inputs": ["[116925440]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 31367, + "ts": 667918548495.928, "dur": 5.367, + "args": { + "External id": 254396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[116925440], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918548520.493, "dur": 5078.892, + "args": { + "External id": 254397,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [14615680, 1]], "Input Dims": [[], [], [], [8, 14615680]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 4183438, "tid": 31367, + "ts": 667918548538.960, "dur": 5048.150, + "args": { + "External id": 254398,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [14615680, 1]], "Input Dims": [[], [], [], [8, 14615680]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918548643.412, "dur": 50.194, + "args": { + "External id": 254399,"Record function id": 0, "Concrete Inputs": ["[28789]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 31367, + "ts": 667918548724.161, "dur": 4824.919, + "args": { + "External id": 254400,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[28789], [], [], [], [], [], [], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 31367, + "ts": 667918548726.817, "dur": 4821.656, + "args": { + "External id": 254401,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[28789], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 31367, + "ts": 667918548731.136, "dur": 7.757, + "args": { + "External id": 254402,"Record function id": 0, "Concrete Inputs": ["[28789]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 31367, + "ts": 667918548740.559, "dur": 4804.189, + "args": { + "External id": 254403,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[28789], [28789], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 31367, + "ts": 667918553748.691, "dur": 9.324, + "args": { + "External id": 254404,"Record function id": 0, "Concrete Inputs": ["", "[14615680]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[116925440], [], [], [], [], []], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 31367, + "ts": 667918553751.666, "dur": 5.816, + "args": { + "External id": 254405,"Record function id": 0, "Concrete Inputs": ["[14615680]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 4183438, "tid": 31367, + "ts": 667918553784.673, "dur": 451.115, + "args": { + "External id": 254406,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[14615680], [116925440], [], [], [], []], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918553809.899, "dur": 420.685, + "args": { + "External id": 254407,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 14615680, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[116925440], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4550, "In msg nelems": 116925440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 4183438, "tid": 31367, + "ts": 667918553820.755, "dur": 402.859, + "args": { + "External id": 254408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[116925440]], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 31367, + "ts": 667918554263.172, "dur": 2.570, + "args": { + "External id": 254409,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4552, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554325.997, "dur": 6.756, + "args": { + "External id": 254410,"Record function id": 0, "Concrete Inputs": ["", "[4000, 1024]", "[1024, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554376.282, "dur": 1.906, + "args": { + "External id": 254411,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "4096000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554393.601, "dur": 1.631, + "args": { + "External id": 254412,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "4096128"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554407.865, "dur": 1.088, + "args": { + "External id": 254413,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "4227200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554420.563, "dur": 1.234, + "args": { + "External id": 254414,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "4358272"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554433.462, "dur": 1.006, + "args": { + "External id": 254415,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "4489344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554445.029, "dur": 1.031, + "args": { + "External id": 254416,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "4620416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554456.139, "dur": 1.623, + "args": { + "External id": 254417,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "4620544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554468.418, "dur": 1.426, + "args": { + "External id": 254418,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "4980992"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554479.574, "dur": 1.478, + "args": { + "External id": 254419,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "5341440"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554491.270, "dur": 1.463, + "args": { + "External id": 254420,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "5701888"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554503.204, "dur": 1.492, + "args": { + "External id": 254421,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "5702016"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554514.008, "dur": 1.250, + "args": { + "External id": 254422,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "5833088"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554525.190, "dur": 1.122, + "args": { + "External id": 254423,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "5964160"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554535.523, "dur": 1.161, + "args": { + "External id": 254424,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "6095232"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554546.070, "dur": 1.112, + "args": { + "External id": 254425,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "6226304"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554557.220, "dur": 1.366, + "args": { + "External id": 254426,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "6226432"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554569.040, "dur": 1.236, + "args": { + "External id": 254427,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "6586880"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554580.117, "dur": 1.454, + "args": { + "External id": 254428,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "6947328"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554592.141, "dur": 1.515, + "args": { + "External id": 254429,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "7307776"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554603.630, "dur": 1.564, + "args": { + "External id": 254430,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "7307904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554616.115, "dur": 1.118, + "args": { + "External id": 254431,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "7438976"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554627.017, "dur": 1.554, + "args": { + "External id": 254432,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "7570048"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554639.092, "dur": 1.283, + "args": { + "External id": 254433,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "7701120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554650.024, "dur": 34.295, + "args": { + "External id": 254434,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "7832192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554699.955, "dur": 2.051, + "args": { + "External id": 254435,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "7832320"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554712.085, "dur": 1.367, + "args": { + "External id": 254436,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "8192768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554723.416, "dur": 1.227, + "args": { + "External id": 254437,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "8553216"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554734.454, "dur": 1.553, + "args": { + "External id": 254438,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "8913664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554746.051, "dur": 1.284, + "args": { + "External id": 254439,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "8913792"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554756.941, "dur": 1.211, + "args": { + "External id": 254440,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "9044864"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554768.313, "dur": 1.515, + "args": { + "External id": 254441,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "9175936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554779.379, "dur": 1.408, + "args": { + "External id": 254442,"Record function id": 0, "Concrete Inputs": ["", "[128, 1024]", "[1024, 1]", "9307008"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554790.710, "dur": 1.548, + "args": { + "External id": 254443,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "9438080"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554802.142, "dur": 1.664, + "args": { + "External id": 254444,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "9438208"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554813.486, "dur": 1.601, + "args": { + "External id": 254445,"Record function id": 0, "Concrete Inputs": ["", "[352, 1024]", "[1024, 1]", "9798656"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554825.086, "dur": 1.529, + "args": { + "External id": 254446,"Record function id": 0, "Concrete Inputs": ["", "[128, 2816]", "[2816, 1]", "10159104"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554836.278, "dur": 1.400, + "args": { + "External id": 254447,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "10519552"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 31367, + "ts": 667918554847.305, "dur": 1.711, + "args": { + "External id": 254448,"Record function id": 0, "Concrete Inputs": ["", "[4000, 1024]", "[1024, 1]", "10519680"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#13311", "pid": 4183438, "tid": 4183438, + "ts": 667917756283.920, "dur": 812476.769, + "args": { + "External id": 245761,"Record function id": 0, "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 4183438, "tid": 4183438, + "ts": 667917756314.804, "dur": 420.137, + "args": { + "External id": 245762,"Record function id": 0, "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 4183438, "tid": 4183438, + "ts": 667917756772.906, "dur": 2165.360, + "args": { + "External id": 245763,"Record function id": 0, "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917757734.279, "dur": 7.813, + "args": { + "External id": 245764,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 4183438, "tid": 4183438, + "ts": 667917757763.877, "dur": 6.444, + "args": { + "External id": 245765,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917758177.811, "dur": 2.257, + "args": { + "External id": 245766,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 4183438, "tid": 4183438, + "ts": 667917758187.908, "dur": 2.407, + "args": { + "External id": 245767,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917758819.310, "dur": 2.633, + "args": { + "External id": 245768,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 4183438, "tid": 4183438, + "ts": 667917758829.221, "dur": 2.343, + "args": { + "External id": 245769,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917759491.826, "dur": 14.014, + "args": { + "External id": 245770,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917759499.493, "dur": 2.348, + "args": { + "External id": 245771,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917759507.366, "dur": 4.489, + "args": { + "External id": 245772,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917759509.683, "dur": 1.136, + "args": { + "External id": 245773,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917759537.305, "dur": 551.034, + "args": { + "External id": 245774,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917759544.776, "dur": 542.635, + "args": { + "External id": 245775,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917759553.885, "dur": 10.372, + "args": { + "External id": 245776,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917759566.565, "dur": 518.901, + "args": { + "External id": 245777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917759576.705, "dur": 0.636, + "args": { + "External id": 245778,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 4183438, "tid": 4183438, + "ts": 667917759579.901, "dur": 8.329, + "args": { + "External id": 245779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 4183438, "tid": 4183438, + "ts": 667917759584.580, "dur": 3.473, + "args": { + "External id": 245780,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917759587.194, "dur": 0.593, + "args": { + "External id": 245781,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667917759590.605, "dur": 223.647, + "args": { + "External id": 245782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667917759592.701, "dur": 221.194, + "args": { + "External id": 245783,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917759594.897, "dur": 35.052, + "args": { + "External id": 245784,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917759596.980, "dur": 32.294, + "args": { + "External id": 245785,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917759630.918, "dur": 181.627, + "args": { + "External id": 245786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917759817.851, "dur": 263.830, + "args": { + "External id": 245787,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917760109.107, "dur": 539.684, + "args": { + "External id": 245788,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917760110.927, "dur": 537.373, + "args": { + "External id": 245789,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760118.747, "dur": 8.644, + "args": { + "External id": 245790,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917760128.493, "dur": 515.647, + "args": { + "External id": 245791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 4183438, "tid": 4183438, + "ts": 667917760714.525, "dur": 59.292, + "args": { + "External id": 245792,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917760720.799, "dur": 6.761, + "args": { + "External id": 245793,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 4183438, "tid": 4183438, + "ts": 667917760730.511, "dur": 42.930, + "args": { + "External id": 245794,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667917760735.938, "dur": 6.817, + "args": { + "External id": 245795,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 4183438, "tid": 4183438, + "ts": 667917760787.457, "dur": 73.971, + "args": { + "External id": 245796,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 4183438, "tid": 4183438, + "ts": 667917760791.890, "dur": 7.570, + "args": { + "External id": 245797,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760797.444, "dur": 1.751, + "args": { + "External id": 245798,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917760800.646, "dur": 4.205, + "args": { + "External id": 245799,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 4183438, + "ts": 667917760809.165, "dur": 3.784, + "args": { + "External id": 245800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 4183438, "tid": 4183438, + "ts": 667917760815.594, "dur": 5.518, + "args": { + "External id": 245801,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760820.418, "dur": 0.566, + "args": { + "External id": 245802,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 4183438, "tid": 4183438, + "ts": 667917760822.172, "dur": 4.666, + "args": { + "External id": 245803,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760825.904, "dur": 0.837, + "args": { + "External id": 245804,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 4183438, "tid": 4183438, + "ts": 667917760828.774, "dur": 4.344, + "args": { + "External id": 245805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 4183438, "tid": 4183438, + "ts": 667917760830.487, "dur": 2.532, + "args": { + "External id": 245806,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760832.217, "dur": 0.687, + "args": { + "External id": 245807,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917760834.094, "dur": 26.567, + "args": { + "External id": 245808,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917760869.623, "dur": 34.530, + "args": { + "External id": 245809,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917760871.375, "dur": 32.522, + "args": { + "External id": 245810,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917760879.409, "dur": 3.142, + "args": { + "External id": 245811,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917760883.845, "dur": 19.536, + "args": { + "External id": 245812,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917761008.518, "dur": 147.096, + "args": { + "External id": 245813,"Record function id": 0, "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 4183438, "tid": 4183438, + "ts": 667917761088.619, "dur": 56.827, + "args": { + "External id": 245814,"Record function id": 0, "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917761162.165, "dur": 62.541, + "args": { + "External id": 245815,"Record function id": 0, "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917761235.057, "dur": 7805.287, + "args": { + "External id": 245816,"Record function id": 0, "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 4183438, "tid": 4183438, + "ts": 667917761245.717, "dur": 1263.362, + "args": { + "External id": 245817,"Record function id": 0, "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917761352.616, "dur": 7.927, + "args": { + "External id": 245818,"Record function id": 0, "Concrete Inputs": ["[14615680]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917761378.764, "dur": 134.614, + "args": { + "External id": 245819,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[14615680], [], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761383.685, "dur": 1.791, + "args": { + "External id": 245820,"Record function id": 0, "Concrete Inputs": ["", "[4096000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761392.400, "dur": 0.566, + "args": { + "External id": 245821,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "4096000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761393.900, "dur": 0.407, + "args": { + "External id": 245822,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "4096128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761395.045, "dur": 2.014, + "args": { + "External id": 245823,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "4227200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761401.516, "dur": 0.416, + "args": { + "External id": 245824,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "4358272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761402.769, "dur": 0.174, + "args": { + "External id": 245825,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "4489344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761403.813, "dur": 2.566, + "args": { + "External id": 245826,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "4620416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761409.615, "dur": 0.317, + "args": { + "External id": 245827,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4620544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761410.802, "dur": 0.317, + "args": { + "External id": 245828,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4980992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761416.456, "dur": 0.352, + "args": { + "External id": 245829,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "5341440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761417.500, "dur": 0.312, + "args": { + "External id": 245830,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "5701888"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761418.565, "dur": 1.567, + "args": { + "External id": 245831,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "5702016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761424.987, "dur": 0.159, + "args": { + "External id": 245832,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "5833088"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761425.785, "dur": 0.166, + "args": { + "External id": 245833,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "5964160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761426.867, "dur": 2.280, + "args": { + "External id": 245834,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "6095232"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761432.030, "dur": 0.301, + "args": { + "External id": 245835,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "6226304"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761432.989, "dur": 0.179, + "args": { + "External id": 245836,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "6226432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761439.109, "dur": 0.392, + "args": { + "External id": 245837,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "6586880"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761440.248, "dur": 0.336, + "args": { + "External id": 245838,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "6947328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761441.397, "dur": 1.509, + "args": { + "External id": 245839,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "7307776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761447.337, "dur": 0.333, + "args": { + "External id": 245840,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "7307904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761448.338, "dur": 0.147, + "args": { + "External id": 245841,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "7438976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761449.508, "dur": 2.396, + "args": { + "External id": 245842,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "7570048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761454.366, "dur": 0.294, + "args": { + "External id": 245843,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "7701120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761455.279, "dur": 0.333, + "args": { + "External id": 245844,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "7832192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761460.554, "dur": 0.286, + "args": { + "External id": 245845,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "7832320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761461.556, "dur": 0.301, + "args": { + "External id": 245846,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "8192768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761462.525, "dur": 1.595, + "args": { + "External id": 245847,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "8553216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761469.188, "dur": 0.300, + "args": { + "External id": 245848,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "8913664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761470.102, "dur": 0.168, + "args": { + "External id": 245849,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "8913792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761471.136, "dur": 2.572, + "args": { + "External id": 245850,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "9044864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761476.901, "dur": 0.295, + "args": { + "External id": 245851,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "9175936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761477.813, "dur": 0.296, + "args": { + "External id": 245852,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "9307008"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761482.917, "dur": 0.330, + "args": { + "External id": 245853,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "9438080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761484.007, "dur": 0.327, + "args": { + "External id": 245854,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "9438208"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761485.029, "dur": 1.351, + "args": { + "External id": 245855,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "9798656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761491.391, "dur": 0.208, + "args": { + "External id": 245856,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "10159104"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761492.148, "dur": 0.169, + "args": { + "External id": 245857,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "10519552"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761492.960, "dur": 2.518, + "args": { + "External id": 245858,"Record function id": 0, "Concrete Inputs": ["", "[4096000]", "[1]", "10519680"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917761538.923, "dur": 49.383, + "args": { + "External id": 245859,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917761697.784, "dur": 239.924, + "args": { + "External id": 245860,"Record function id": 0, "Concrete Inputs": ["", "", "14615680", "8", "2", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917761714.224, "dur": 6.014, + "args": { + "External id": 245861,"Record function id": 0, "Concrete Inputs": ["[116925440]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917761727.748, "dur": 14.131, + "args": { + "External id": 245862,"Record function id": 0, "Concrete Inputs": ["", "0", "29231360", "14615680"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[116925440], [], [], []], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917761732.151, "dur": 9.272, + "args": { + "External id": 245863,"Record function id": 0, "Concrete Inputs": ["", "0", "29231360", "43847040", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[116925440], [], [], [], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761737.705, "dur": 0.785, + "args": { + "External id": 245864,"Record function id": 0, "Concrete Inputs": ["", "[14615680]", "[1]", "29231360"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[116925440], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917761750.348, "dur": 111.027, + "args": { + "External id": 245865,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[14615680], [], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761752.637, "dur": 0.521, + "args": { + "External id": 245866,"Record function id": 0, "Concrete Inputs": ["", "[4096000]", "[1]", "29231360"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761754.470, "dur": 0.271, + "args": { + "External id": 245867,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "33327360"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761758.520, "dur": 1.294, + "args": { + "External id": 245868,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "33327488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761760.607, "dur": 0.757, + "args": { + "External id": 245869,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "33458560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761764.287, "dur": 0.419, + "args": { + "External id": 245870,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "33589632"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761765.502, "dur": 0.200, + "args": { + "External id": 245871,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "33720704"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761766.549, "dur": 0.340, + "args": { + "External id": 245872,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "33851776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761769.653, "dur": 0.164, + "args": { + "External id": 245873,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "33851904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761770.533, "dur": 0.561, + "args": { + "External id": 245874,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "34212352"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761771.986, "dur": 0.242, + "args": { + "External id": 245875,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "34572800"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761775.088, "dur": 1.369, + "args": { + "External id": 245876,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "34933248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761777.457, "dur": 0.362, + "args": { + "External id": 245877,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "34933376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761780.530, "dur": 2.337, + "args": { + "External id": 245878,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "35064448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761783.700, "dur": 0.329, + "args": { + "External id": 245879,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "35195520"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761784.827, "dur": 0.327, + "args": { + "External id": 245880,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "35326592"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761789.131, "dur": 0.167, + "args": { + "External id": 245881,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "35457664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761789.960, "dur": 0.311, + "args": { + "External id": 245882,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "35457792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761790.879, "dur": 0.276, + "args": { + "External id": 245883,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "35818240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761796.752, "dur": 1.305, + "args": { + "External id": 245884,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "36178688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761798.822, "dur": 0.164, + "args": { + "External id": 245885,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "36539136"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761801.591, "dur": 2.588, + "args": { + "External id": 245886,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "36539264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761804.826, "dur": 0.209, + "args": { + "External id": 245887,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "36670336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761805.711, "dur": 0.324, + "args": { + "External id": 245888,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "36801408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761811.426, "dur": 0.173, + "args": { + "External id": 245889,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "36932480"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761812.217, "dur": 0.177, + "args": { + "External id": 245890,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "37063552"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761813.124, "dur": 0.161, + "args": { + "External id": 245891,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "37063680"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761817.982, "dur": 1.093, + "args": { + "External id": 245892,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "37424128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761819.858, "dur": 0.315, + "args": { + "External id": 245893,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "37784576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761822.795, "dur": 2.409, + "args": { + "External id": 245894,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "38145024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761825.853, "dur": 0.188, + "args": { + "External id": 245895,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "38145152"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761826.695, "dur": 0.180, + "args": { + "External id": 245896,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "38276224"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761831.844, "dur": 0.169, + "args": { + "External id": 245897,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "38407296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761832.673, "dur": 0.159, + "args": { + "External id": 245898,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "38538368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761833.472, "dur": 0.165, + "args": { + "External id": 245899,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "38669440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761838.944, "dur": 1.295, + "args": { + "External id": 245900,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "38669568"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761841.029, "dur": 0.141, + "args": { + "External id": 245901,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "39030016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761843.592, "dur": 2.468, + "args": { + "External id": 245902,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "39390464"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761846.721, "dur": 0.179, + "args": { + "External id": 245903,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "39750912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917761847.355, "dur": 0.303, + "args": { + "External id": 245904,"Record function id": 0, "Concrete Inputs": ["", "[4096000]", "[1]", "39751040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[14615680], [], [], []], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917761884.509, "dur": 36.869, + "args": { + "External id": 245905,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917761993.961, "dur": 399.151, + "args": { + "External id": 245906,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[116925440], [14615680], [], [], []], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917762028.887, "dur": 358.578, + "args": { + "External id": 245907,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 116925440, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[14615680], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4738, "In msg nelems": 14615680 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917762039.079, "dur": 341.772, + "args": { + "External id": 245908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[14615680]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917762421.366, "dur": 2.190, + "args": { + "External id": 245909,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4740, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 4183438, "tid": 4183438, + "ts": 667917762526.238, "dur": 6337.843, + "args": { + "External id": 245910,"Record function id": 0, "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762743.784, "dur": 7.769, + "args": { + "External id": 245911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[116925440], []], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762755.795, "dur": 1.337, + "args": { + "External id": 245912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[32768000], []], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762759.277, "dur": 0.674, + "args": { + "External id": 245913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762761.916, "dur": 1.873, + "args": { + "External id": 245914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762766.943, "dur": 0.658, + "args": { + "External id": 245915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762769.175, "dur": 0.922, + "args": { + "External id": 245916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762771.842, "dur": 0.793, + "args": { + "External id": 245917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762774.157, "dur": 2.054, + "args": { + "External id": 245918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762782.660, "dur": 0.603, + "args": { + "External id": 245919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762784.654, "dur": 0.471, + "args": { + "External id": 245920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762786.624, "dur": 0.806, + "args": { + "External id": 245921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762788.824, "dur": 1.547, + "args": { + "External id": 245922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762793.799, "dur": 0.435, + "args": { + "External id": 245923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762795.610, "dur": 0.724, + "args": { + "External id": 245924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762797.714, "dur": 0.951, + "args": { + "External id": 245925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762799.894, "dur": 1.731, + "args": { + "External id": 245926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762807.044, "dur": 0.742, + "args": { + "External id": 245927,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762809.100, "dur": 0.577, + "args": { + "External id": 245928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762811.402, "dur": 0.530, + "args": { + "External id": 245929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762813.252, "dur": 1.967, + "args": { + "External id": 245930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762818.517, "dur": 0.442, + "args": { + "External id": 245931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762820.216, "dur": 0.588, + "args": { + "External id": 245932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762822.268, "dur": 0.698, + "args": { + "External id": 245933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762824.194, "dur": 1.580, + "args": { + "External id": 245934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762844.230, "dur": 0.523, + "args": { + "External id": 245935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762846.310, "dur": 0.520, + "args": { + "External id": 245936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762848.463, "dur": 0.920, + "args": { + "External id": 245937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762850.877, "dur": 1.895, + "args": { + "External id": 245938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762856.236, "dur": 0.512, + "args": { + "External id": 245939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762858.219, "dur": 0.419, + "args": { + "External id": 245940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762860.056, "dur": 0.501, + "args": { + "External id": 245941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762861.688, "dur": 1.619, + "args": { + "External id": 245942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762868.982, "dur": 0.560, + "args": { + "External id": 245943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762870.950, "dur": 0.556, + "args": { + "External id": 245944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762872.914, "dur": 0.980, + "args": { + "External id": 245945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762875.386, "dur": 1.422, + "args": { + "External id": 245946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762879.974, "dur": 0.622, + "args": { + "External id": 245947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762881.916, "dur": 0.634, + "args": { + "External id": 245948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762884.074, "dur": 0.576, + "args": { + "External id": 245949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917762885.970, "dur": 1.849, + "args": { + "External id": 245950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[32768000], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917762918.864, "dur": 5891.285, + "args": { + "External id": 245951,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[14615680, 1], [], [], []], "Input Dims": [[8, 14615680], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917762944.117, "dur": 5856.185, + "args": { + "External id": 245952,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[14615680, 1], [], [], []], "Input Dims": [[8, 14615680], [], [], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917762968.825, "dur": 5.550, + "args": { + "External id": 245953,"Record function id": 0, "Concrete Inputs": ["[3034]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917762978.876, "dur": 5780.923, + "args": { + "External id": 245954,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3034], [], [], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917762981.387, "dur": 5777.536, + "args": { + "External id": 245955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3034], [], [], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917762989.747, "dur": 5.679, + "args": { + "External id": 245956,"Record function id": 0, "Concrete Inputs": ["[3034]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917762997.248, "dur": 5758.416, + "args": { + "External id": 245957,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3034], [3034], []], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917769120.721, "dur": 40.639, + "args": { + "External id": 245958,"Record function id": 0, "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 4183438, "tid": 4183438, + "ts": 667917769162.578, "dur": 228.195, + "args": { + "External id": 245959,"Record function id": 0, "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917769219.412, "dur": 161.817, + "args": { + "External id": 245960,"Sequence number": 2987532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32000, 1024], [16, 4096]], "Ev Idx": 4791 + } + }, + { + "ph": "s", "id": 212, "pid": 4183438, "tid": 4183438, "ts": 667917769219.412, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 4183438, "tid": 4183438, + "ts": 667917769294.128, "dur": 49.951, + "args": { + "External id": 245961,"kernel_hash": "crydi4fszmveuo3xtgmvd5wzmvtv7cilsx6l2c4ussrew7shlvlg", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/ry/crydi4fszmveuo3xtgmvd5wzmvtv7cilsx6l2c4ussrew7shlvlg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096], [32000, 1024], [16, 4096, 1024], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917769455.036, "dur": 55.451, + "args": { + "External id": 245962,"Record function id": 0, "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 4183438, "tid": 4183438, + "ts": 667917769520.290, "dur": 6593.065, + "args": { + "External id": 245963,"Record function id": 0, "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 4183438, "tid": 4183438, + "ts": 667917769528.719, "dur": 885.723, + "args": { + "External id": 245964,"Record function id": 0, "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917769597.491, "dur": 11.633, + "args": { + "External id": 245965,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917769621.450, "dur": 78.307, + "args": { + "External id": 245966,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769630.312, "dur": 2.322, + "args": { + "External id": 245967,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769634.240, "dur": 0.380, + "args": { + "External id": 245968,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769635.347, "dur": 2.392, + "args": { + "External id": 245969,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769641.307, "dur": 0.327, + "args": { + "External id": 245970,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769642.272, "dur": 0.274, + "args": { + "External id": 245971,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769645.699, "dur": 0.177, + "args": { + "External id": 245972,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769648.578, "dur": 0.240, + "args": { + "External id": 245973,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769649.762, "dur": 1.438, + "args": { + "External id": 245974,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769691.955, "dur": 0.676, + "args": { + "External id": 245975,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917769710.817, "dur": 38.267, + "args": { + "External id": 245976,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917769785.020, "dur": 114.235, + "args": { + "External id": 245977,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917769796.560, "dur": 4.910, + "args": { + "External id": 245978,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917769806.510, "dur": 12.280, + "args": { + "External id": 245979,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917769811.134, "dur": 7.216, + "args": { + "External id": 245980,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769816.486, "dur": 0.715, + "args": { + "External id": 245981,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917769825.633, "dur": 30.236, + "args": { + "External id": 245982,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769827.217, "dur": 0.486, + "args": { + "External id": 245983,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769830.980, "dur": 0.181, + "args": { + "External id": 245984,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769834.243, "dur": 0.374, + "args": { + "External id": 245985,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769835.518, "dur": 0.255, + "args": { + "External id": 245986,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769839.099, "dur": 3.880, + "args": { + "External id": 245987,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769843.710, "dur": 0.197, + "args": { + "External id": 245988,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769846.733, "dur": 0.276, + "args": { + "External id": 245989,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769850.167, "dur": 0.274, + "args": { + "External id": 245990,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917769851.056, "dur": 0.167, + "args": { + "External id": 245991,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917769868.303, "dur": 22.234, + "args": { + "External id": 245992,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917769950.733, "dur": 364.274, + "args": { + "External id": 245993,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917769979.220, "dur": 330.361, + "args": { + "External id": 245994,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4825, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917769991.264, "dur": 312.113, + "args": { + "External id": 245995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917770342.212, "dur": 2.416, + "args": { + "External id": 245996,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4827, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 4183438, "tid": 4183438, + "ts": 667917770435.225, "dur": 5479.699, + "args": { + "External id": 245997,"Record function id": 0, "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770527.961, "dur": 6.961, + "args": { + "External id": 245998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770538.041, "dur": 0.963, + "args": { + "External id": 245999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770540.635, "dur": 0.789, + "args": { + "External id": 246000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770543.141, "dur": 1.700, + "args": { + "External id": 246001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770546.241, "dur": 0.821, + "args": { + "External id": 246002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770550.495, "dur": 0.817, + "args": { + "External id": 246003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770552.752, "dur": 0.990, + "args": { + "External id": 246004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770555.117, "dur": 1.675, + "args": { + "External id": 246005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770561.041, "dur": 0.561, + "args": { + "External id": 246006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917770565.224, "dur": 0.520, + "args": { + "External id": 246007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917770582.761, "dur": 5294.870, + "args": { + "External id": 246008,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917770601.089, "dur": 5269.427, + "args": { + "External id": 246009,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917770622.410, "dur": 14.511, + "args": { + "External id": 246010,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917770639.558, "dur": 5198.935, + "args": { + "External id": 246011,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917770642.185, "dur": 5195.678, + "args": { + "External id": 246012,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917770648.626, "dur": 45.926, + "args": { + "External id": 246013,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917770697.346, "dur": 5137.514, + "args": { + "External id": 246014,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917776057.061, "dur": 31.955, + "args": { + "External id": 246015,"Sequence number": 2987533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4846 + } + }, + { + "ph": "s", "id": 211, "pid": 4183438, "tid": 4183438, "ts": 667917776057.061, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917776073.276, "dur": 10.828, + "args": { + "External id": 246016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917776079.442, "dur": 4.362, + "args": { + "External id": 246017,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917776152.019, "dur": 110.297, + "args": { + "External id": 246018,"Record function id": 0, "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917776265.164, "dur": 1129.232, + "args": { + "External id": 246019,"Record function id": 0, "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917776309.142, "dur": 1069.803, + "args": { + "External id": 246020,"Sequence number": 2987534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 4851 + } + }, + { + "ph": "s", "id": 210, "pid": 4183438, "tid": 4183438, "ts": 667917776309.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917776377.596, "dur": 46.435, + "args": { + "External id": 246021,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917776437.331, "dur": 93.290, + "args": { + "External id": 246022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917776541.514, "dur": 34.809, + "args": { + "External id": 246023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917776585.973, "dur": 29.920, + "args": { + "External id": 246024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917776641.783, "dur": 64.972, + "args": { + "External id": 246025,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917776731.007, "dur": 19.619, + "args": { + "External id": 246026,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917776771.150, "dur": 131.026, + "args": { + "External id": 246027,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917776823.428, "dur": 11.530, + "args": { + "External id": 246028,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917776828.217, "dur": 6.108, + "args": { + "External id": 246029,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917776837.523, "dur": 3.550, + "args": { + "External id": 246030,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917776844.535, "dur": 0.884, + "args": { + "External id": 246031,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917776847.878, "dur": 3.928, + "args": { + "External id": 246032,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917776913.575, "dur": 51.151, + "args": { + "External id": 246033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917776998.449, "dur": 28.919, + "args": { + "External id": 246034,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917777034.183, "dur": 38.562, + "args": { + "External id": 246035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917777083.571, "dur": 47.913, + "args": { + "External id": 246036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917777153.321, "dur": 22.882, + "args": { + "External id": 246037,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917777182.231, "dur": 67.975, + "args": { + "External id": 246038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917777278.613, "dur": 19.814, + "args": { + "External id": 246039,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 4183438, "tid": 4183438, + "ts": 667917777463.065, "dur": 82.021, + "args": { + "External id": 246040,"Record function id": 0, "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917777616.285, "dur": 100.497, + "args": { + "External id": 246041,"Record function id": 0, "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 4183438, "tid": 4183438, + "ts": 667917777728.294, "dur": 9695.583, + "args": { + "External id": 246042,"Record function id": 0, "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 4183438, "tid": 4183438, + "ts": 667917777740.507, "dur": 1007.903, + "args": { + "External id": 246043,"Record function id": 0, "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917777822.407, "dur": 9.694, + "args": { + "External id": 246044,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917777845.473, "dur": 42.691, + "args": { + "External id": 246045,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777852.764, "dur": 3.871, + "args": { + "External id": 246046,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777858.472, "dur": 0.379, + "args": { + "External id": 246047,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777859.913, "dur": 0.679, + "args": { + "External id": 246048,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777866.157, "dur": 0.195, + "args": { + "External id": 246049,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777867.299, "dur": 1.406, + "args": { + "External id": 246050,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777871.319, "dur": 0.187, + "args": { + "External id": 246051,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777874.750, "dur": 0.161, + "args": { + "External id": 246052,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777875.703, "dur": 0.169, + "args": { + "External id": 246053,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917777878.676, "dur": 2.826, + "args": { + "External id": 246054,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917777898.652, "dur": 43.077, + "args": { + "External id": 246055,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917777979.052, "dur": 118.500, + "args": { + "External id": 246056,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917777990.183, "dur": 4.087, + "args": { + "External id": 246057,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917778000.428, "dur": 12.241, + "args": { + "External id": 246058,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917778005.015, "dur": 7.224, + "args": { + "External id": 246059,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778010.417, "dur": 0.622, + "args": { + "External id": 246060,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917778019.121, "dur": 30.256, + "args": { + "External id": 246061,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778020.596, "dur": 0.331, + "args": { + "External id": 246062,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778024.488, "dur": 1.750, + "args": { + "External id": 246063,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778027.033, "dur": 2.797, + "args": { + "External id": 246064,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778032.773, "dur": 0.282, + "args": { + "External id": 246065,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778033.807, "dur": 0.292, + "args": { + "External id": 246066,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778037.265, "dur": 0.171, + "args": { + "External id": 246067,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778040.520, "dur": 0.271, + "args": { + "External id": 246068,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778041.527, "dur": 0.322, + "args": { + "External id": 246069,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778045.211, "dur": 0.196, + "args": { + "External id": 246070,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917778064.270, "dur": 24.234, + "args": { + "External id": 246071,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917778151.553, "dur": 431.930, + "args": { + "External id": 246072,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917778181.925, "dur": 396.298, + "args": { + "External id": 246073,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4904, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917778223.857, "dur": 349.230, + "args": { + "External id": 246074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917778607.936, "dur": 2.400, + "args": { + "External id": 246075,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4906, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 4183438, "tid": 4183438, + "ts": 667917778773.065, "dur": 8456.830, + "args": { + "External id": 246076,"Record function id": 0, "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778869.487, "dur": 6.723, + "args": { + "External id": 246077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778879.611, "dur": 1.260, + "args": { + "External id": 246078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778882.804, "dur": 1.013, + "args": { + "External id": 246079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778886.017, "dur": 0.791, + "args": { + "External id": 246080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778888.384, "dur": 0.965, + "args": { + "External id": 246081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778893.190, "dur": 0.858, + "args": { + "External id": 246082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778895.477, "dur": 0.773, + "args": { + "External id": 246083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778899.885, "dur": 3.074, + "args": { + "External id": 246084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778904.636, "dur": 0.601, + "args": { + "External id": 246085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917778908.676, "dur": 0.613, + "args": { + "External id": 246086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917778927.102, "dur": 8251.410, + "args": { + "External id": 246087,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917778945.209, "dur": 8226.248, + "args": { + "External id": 246088,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917778965.582, "dur": 14.381, + "args": { + "External id": 246089,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917778982.744, "dur": 8154.958, + "args": { + "External id": 246090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917778985.317, "dur": 8151.865, + "args": { + "External id": 246091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917778991.377, "dur": 5.146, + "args": { + "External id": 246092,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917778998.206, "dur": 8135.596, + "args": { + "External id": 246093,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917787364.848, "dur": 31.345, + "args": { + "External id": 246094,"Sequence number": 2987535, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 4925 + } + }, + { + "ph": "s", "id": 209, "pid": 4183438, "tid": 4183438, "ts": 667917787364.848, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917787380.884, "dur": 10.479, + "args": { + "External id": 246095,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917787386.601, "dur": 4.508, + "args": { + "External id": 246096,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917787463.609, "dur": 91.207, + "args": { + "External id": 246097,"Record function id": 0, "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917787556.453, "dur": 1180.391, + "args": { + "External id": 246098,"Record function id": 0, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917787597.618, "dur": 1122.944, + "args": { + "External id": 246099,"Sequence number": 2987536, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 4930 + } + }, + { + "ph": "s", "id": 208, "pid": 4183438, "tid": 4183438, "ts": 667917787597.618, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917787727.487, "dur": 46.642, + "args": { + "External id": 246100,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917787793.872, "dur": 95.722, + "args": { + "External id": 246101,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917787897.634, "dur": 35.250, + "args": { + "External id": 246102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917787939.029, "dur": 30.178, + "args": { + "External id": 246103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917788000.867, "dur": 23.796, + "args": { + "External id": 246104,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917788040.079, "dur": 18.169, + "args": { + "External id": 246105,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917788072.868, "dur": 163.614, + "args": { + "External id": 246106,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917788124.787, "dur": 13.894, + "args": { + "External id": 246107,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917788129.700, "dur": 8.096, + "args": { + "External id": 246108,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917788144.176, "dur": 3.746, + "args": { + "External id": 246109,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917788149.057, "dur": 3.381, + "args": { + "External id": 246110,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917788154.753, "dur": 2.506, + "args": { + "External id": 246111,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917788249.008, "dur": 59.490, + "args": { + "External id": 246112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917788340.473, "dur": 28.974, + "args": { + "External id": 246113,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917788377.246, "dur": 39.199, + "args": { + "External id": 246114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917788423.118, "dur": 32.384, + "args": { + "External id": 246115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917788479.848, "dur": 25.036, + "args": { + "External id": 246116,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917788510.181, "dur": 50.608, + "args": { + "External id": 246117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917788579.033, "dur": 21.532, + "args": { + "External id": 246118,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 4183438, "tid": 4183438, + "ts": 667917788804.857, "dur": 78.979, + "args": { + "External id": 246119,"Record function id": 0, "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917788957.290, "dur": 47.482, + "args": { + "External id": 246120,"Record function id": 0, "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 4183438, "tid": 4183438, + "ts": 667917789013.710, "dur": 8970.940, + "args": { + "External id": 246121,"Record function id": 0, "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 4183438, "tid": 4183438, + "ts": 667917789024.605, "dur": 1009.823, + "args": { + "External id": 246122,"Record function id": 0, "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917789100.225, "dur": 8.995, + "args": { + "External id": 246123,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917789122.137, "dur": 42.637, + "args": { + "External id": 246124,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789129.588, "dur": 2.181, + "args": { + "External id": 246125,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789136.332, "dur": 0.493, + "args": { + "External id": 246126,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789139.785, "dur": 0.338, + "args": { + "External id": 246127,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789140.888, "dur": 0.339, + "args": { + "External id": 246128,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789144.762, "dur": 0.430, + "args": { + "External id": 246129,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789148.374, "dur": 0.375, + "args": { + "External id": 246130,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789149.501, "dur": 3.842, + "args": { + "External id": 246131,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789154.180, "dur": 0.368, + "args": { + "External id": 246132,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789157.878, "dur": 0.508, + "args": { + "External id": 246133,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917789175.473, "dur": 77.843, + "args": { + "External id": 246134,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917789290.632, "dur": 123.710, + "args": { + "External id": 246135,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917789306.937, "dur": 5.134, + "args": { + "External id": 246136,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917789317.740, "dur": 10.738, + "args": { + "External id": 246137,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917789322.482, "dur": 5.581, + "args": { + "External id": 246138,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789325.745, "dur": 0.618, + "args": { + "External id": 246139,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917789335.191, "dur": 33.901, + "args": { + "External id": 246140,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789339.052, "dur": 2.855, + "args": { + "External id": 246141,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789342.766, "dur": 0.310, + "args": { + "External id": 246142,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789343.845, "dur": 0.385, + "args": { + "External id": 246143,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789350.097, "dur": 1.520, + "args": { + "External id": 246144,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789352.298, "dur": 0.324, + "args": { + "External id": 246145,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789353.712, "dur": 0.166, + "args": { + "External id": 246146,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789358.287, "dur": 0.461, + "args": { + "External id": 246147,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789359.595, "dur": 0.339, + "args": { + "External id": 246148,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917789362.449, "dur": 2.156, + "args": { + "External id": 246149,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917789380.357, "dur": 25.907, + "args": { + "External id": 246150,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917789470.054, "dur": 462.477, + "args": { + "External id": 246151,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917789506.148, "dur": 420.721, + "args": { + "External id": 246152,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4983, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917789516.191, "dur": 403.268, + "args": { + "External id": 246153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917789958.281, "dur": 2.294, + "args": { + "External id": 246154,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4985, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 4183438, "tid": 4183438, + "ts": 667917790057.643, "dur": 7736.166, + "args": { + "External id": 246155,"Record function id": 0, "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790154.395, "dur": 7.269, + "args": { + "External id": 246156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790165.042, "dur": 0.878, + "args": { + "External id": 246157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790167.722, "dur": 2.329, + "args": { + "External id": 246158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790171.635, "dur": 0.765, + "args": { + "External id": 246159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790173.874, "dur": 1.022, + "args": { + "External id": 246160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790178.025, "dur": 0.955, + "args": { + "External id": 246161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790182.244, "dur": 0.842, + "args": { + "External id": 246162,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790184.355, "dur": 1.925, + "args": { + "External id": 246163,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790187.913, "dur": 0.744, + "args": { + "External id": 246164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917790221.991, "dur": 1.496, + "args": { + "External id": 246165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917790244.875, "dur": 7509.029, + "args": { + "External id": 246166,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917790263.764, "dur": 7482.942, + "args": { + "External id": 246167,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917790281.647, "dur": 15.470, + "args": { + "External id": 246168,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917790300.180, "dur": 7412.508, + "args": { + "External id": 246169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917790302.501, "dur": 7409.494, + "args": { + "External id": 246170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917790308.444, "dur": 5.175, + "args": { + "External id": 246171,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917790315.696, "dur": 7393.084, + "args": { + "External id": 246172,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917797928.841, "dur": 30.369, + "args": { + "External id": 246173,"Sequence number": 2987537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5004 + } + }, + { + "ph": "s", "id": 207, "pid": 4183438, "tid": 4183438, "ts": 667917797928.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917797944.819, "dur": 9.593, + "args": { + "External id": 246174,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917797950.216, "dur": 3.963, + "args": { + "External id": 246175,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917798024.082, "dur": 88.532, + "args": { + "External id": 246176,"Record function id": 0, "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917798114.256, "dur": 1134.121, + "args": { + "External id": 246177,"Record function id": 0, "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917798153.226, "dur": 1079.082, + "args": { + "External id": 246178,"Sequence number": 2987538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5009 + } + }, + { + "ph": "s", "id": 206, "pid": 4183438, "tid": 4183438, "ts": 667917798153.226, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917798240.272, "dur": 44.916, + "args": { + "External id": 246179,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798299.984, "dur": 94.036, + "args": { + "External id": 246180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798404.827, "dur": 35.764, + "args": { + "External id": 246181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798449.470, "dur": 30.125, + "args": { + "External id": 246182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917798506.677, "dur": 26.706, + "args": { + "External id": 246183,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917798552.412, "dur": 13.253, + "args": { + "External id": 246184,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917798584.129, "dur": 176.143, + "args": { + "External id": 246185,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917798634.527, "dur": 11.940, + "args": { + "External id": 246186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917798639.231, "dur": 6.489, + "args": { + "External id": 246187,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917798651.290, "dur": 44.598, + "args": { + "External id": 246188,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917798698.606, "dur": 1.306, + "args": { + "External id": 246189,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917798702.631, "dur": 3.431, + "args": { + "External id": 246190,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798772.753, "dur": 52.546, + "args": { + "External id": 246191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917798859.903, "dur": 27.026, + "args": { + "External id": 246192,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798895.861, "dur": 40.399, + "args": { + "External id": 246193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917798944.355, "dur": 47.454, + "args": { + "External id": 246194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917799014.377, "dur": 23.858, + "args": { + "External id": 246195,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917799044.735, "dur": 52.279, + "args": { + "External id": 246196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917799119.152, "dur": 20.356, + "args": { + "External id": 246197,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 4183438, "tid": 4183438, + "ts": 667917799318.759, "dur": 80.947, + "args": { + "External id": 246198,"Record function id": 0, "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917799473.873, "dur": 50.255, + "args": { + "External id": 246199,"Record function id": 0, "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 4183438, "tid": 4183438, + "ts": 667917799533.012, "dur": 8508.070, + "args": { + "External id": 246200,"Record function id": 0, "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 4183438, "tid": 4183438, + "ts": 667917799545.791, "dur": 985.604, + "args": { + "External id": 246201,"Record function id": 0, "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917799625.513, "dur": 9.311, + "args": { + "External id": 246202,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917799647.771, "dur": 88.835, + "args": { + "External id": 246203,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799696.060, "dur": 2.262, + "args": { + "External id": 246204,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799703.321, "dur": 0.645, + "args": { + "External id": 246205,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799706.809, "dur": 0.435, + "args": { + "External id": 246206,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799713.168, "dur": 0.338, + "args": { + "External id": 246207,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799716.950, "dur": 0.572, + "args": { + "External id": 246208,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799720.624, "dur": 0.474, + "args": { + "External id": 246209,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799721.875, "dur": 3.526, + "args": { + "External id": 246210,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799726.242, "dur": 0.197, + "args": { + "External id": 246211,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799729.939, "dur": 0.143, + "args": { + "External id": 246212,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917799748.146, "dur": 45.804, + "args": { + "External id": 246213,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917799830.680, "dur": 135.243, + "args": { + "External id": 246214,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917799846.024, "dur": 5.183, + "args": { + "External id": 246215,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917799856.893, "dur": 10.023, + "args": { + "External id": 246216,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917799861.680, "dur": 4.823, + "args": { + "External id": 246217,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799864.748, "dur": 0.523, + "args": { + "External id": 246218,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917799873.245, "dur": 35.623, + "args": { + "External id": 246219,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799877.094, "dur": 2.981, + "args": { + "External id": 246220,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799880.995, "dur": 0.386, + "args": { + "External id": 246221,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799882.262, "dur": 0.327, + "args": { + "External id": 246222,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799888.030, "dur": 1.551, + "args": { + "External id": 246223,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799890.588, "dur": 0.214, + "args": { + "External id": 246224,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799891.434, "dur": 0.179, + "args": { + "External id": 246225,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799897.577, "dur": 0.170, + "args": { + "External id": 246226,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799898.507, "dur": 0.176, + "args": { + "External id": 246227,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917799901.562, "dur": 2.885, + "args": { + "External id": 246228,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917799930.517, "dur": 26.917, + "args": { + "External id": 246229,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917800021.762, "dur": 409.132, + "args": { + "External id": 246230,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917800054.970, "dur": 370.404, + "args": { + "External id": 246231,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5062, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917800067.129, "dur": 352.142, + "args": { + "External id": 246232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917800455.254, "dur": 2.339, + "args": { + "External id": 246233,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5064, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 4183438, "tid": 4183438, + "ts": 667917800554.324, "dur": 7286.135, + "args": { + "External id": 246234,"Record function id": 0, "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800643.681, "dur": 7.091, + "args": { + "External id": 246235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800693.551, "dur": 1.931, + "args": { + "External id": 246236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800697.661, "dur": 2.230, + "args": { + "External id": 246237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800701.821, "dur": 0.894, + "args": { + "External id": 246238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800704.248, "dur": 0.940, + "args": { + "External id": 246239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800708.595, "dur": 0.690, + "args": { + "External id": 246240,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800712.777, "dur": 0.863, + "args": { + "External id": 246241,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800714.896, "dur": 2.057, + "args": { + "External id": 246242,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800718.161, "dur": 0.765, + "args": { + "External id": 246243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917800722.506, "dur": 0.507, + "args": { + "External id": 246244,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917800742.335, "dur": 7057.062, + "args": { + "External id": 246245,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917800760.455, "dur": 7031.874, + "args": { + "External id": 246246,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917800778.377, "dur": 15.384, + "args": { + "External id": 246247,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917800796.930, "dur": 6960.282, + "args": { + "External id": 246248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917800799.914, "dur": 6956.838, + "args": { + "External id": 246249,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917800805.863, "dur": 6.424, + "args": { + "External id": 246250,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917800814.087, "dur": 6939.666, + "args": { + "External id": 246251,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917807974.345, "dur": 40.325, + "args": { + "External id": 246252,"Sequence number": 2987539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5083 + } + }, + { + "ph": "s", "id": 205, "pid": 4183438, "tid": 4183438, "ts": 667917807974.345, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917807999.680, "dur": 10.335, + "args": { + "External id": 246253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917808005.166, "dur": 4.608, + "args": { + "External id": 246254,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917808081.033, "dur": 87.640, + "args": { + "External id": 246255,"Record function id": 0, "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917808170.393, "dur": 1162.118, + "args": { + "External id": 246256,"Record function id": 0, "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917808228.299, "dur": 1088.535, + "args": { + "External id": 246257,"Sequence number": 2987540, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5088 + } + }, + { + "ph": "s", "id": 204, "pid": 4183438, "tid": 4183438, "ts": 667917808228.299, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917808300.789, "dur": 46.282, + "args": { + "External id": 246258,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917808359.753, "dur": 96.635, + "args": { + "External id": 246259,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917808469.458, "dur": 35.534, + "args": { + "External id": 246260,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917808511.170, "dur": 29.956, + "args": { + "External id": 246261,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917808572.778, "dur": 27.337, + "args": { + "External id": 246262,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917808616.400, "dur": 16.488, + "args": { + "External id": 246263,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917808650.192, "dur": 181.510, + "args": { + "External id": 246264,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917808743.750, "dur": 12.671, + "args": { + "External id": 246265,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917808748.505, "dur": 6.840, + "args": { + "External id": 246266,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917808761.010, "dur": 5.034, + "args": { + "External id": 246267,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917808767.279, "dur": 0.877, + "args": { + "External id": 246268,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917808772.896, "dur": 3.685, + "args": { + "External id": 246269,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917808843.973, "dur": 51.866, + "args": { + "External id": 246270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917808925.897, "dur": 28.800, + "args": { + "External id": 246271,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917808963.130, "dur": 38.663, + "args": { + "External id": 246272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917809034.195, "dur": 48.353, + "args": { + "External id": 246273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917809109.270, "dur": 26.728, + "args": { + "External id": 246274,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917809141.432, "dur": 43.908, + "args": { + "External id": 246275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917809220.805, "dur": 23.964, + "args": { + "External id": 246276,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 4183438, "tid": 4183438, + "ts": 667917809398.919, "dur": 77.990, + "args": { + "External id": 246277,"Record function id": 0, "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917809549.932, "dur": 48.293, + "args": { + "External id": 246278,"Record function id": 0, "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 4183438, "tid": 4183438, + "ts": 667917809607.009, "dur": 43668.594, + "args": { + "External id": 246279,"Record function id": 0, "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 4183438, "tid": 4183438, + "ts": 667917809618.069, "dur": 942.253, + "args": { + "External id": 246280,"Record function id": 0, "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917809739.200, "dur": 10.452, + "args": { + "External id": 246281,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917809762.933, "dur": 41.310, + "args": { + "External id": 246282,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809770.309, "dur": 2.195, + "args": { + "External id": 246283,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809776.418, "dur": 0.288, + "args": { + "External id": 246284,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809779.438, "dur": 0.323, + "args": { + "External id": 246285,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809780.499, "dur": 0.557, + "args": { + "External id": 246286,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809784.207, "dur": 0.217, + "args": { + "External id": 246287,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809787.508, "dur": 0.411, + "args": { + "External id": 246288,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809788.813, "dur": 3.788, + "args": { + "External id": 246289,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809793.470, "dur": 0.458, + "args": { + "External id": 246290,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809796.387, "dur": 0.165, + "args": { + "External id": 246291,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917809814.628, "dur": 45.692, + "args": { + "External id": 246292,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917809895.073, "dur": 113.626, + "args": { + "External id": 246293,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917809909.787, "dur": 3.891, + "args": { + "External id": 246294,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917809919.207, "dur": 9.756, + "args": { + "External id": 246295,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917809923.859, "dur": 4.659, + "args": { + "External id": 246296,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809926.728, "dur": 0.518, + "args": { + "External id": 246297,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917809935.385, "dur": 31.448, + "args": { + "External id": 246298,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809938.884, "dur": 2.751, + "args": { + "External id": 246299,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809942.539, "dur": 0.329, + "args": { + "External id": 246300,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809943.896, "dur": 0.326, + "args": { + "External id": 246301,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809949.456, "dur": 1.328, + "args": { + "External id": 246302,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809951.442, "dur": 0.331, + "args": { + "External id": 246303,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809952.505, "dur": 0.154, + "args": { + "External id": 246304,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809956.522, "dur": 0.297, + "args": { + "External id": 246305,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809957.792, "dur": 0.144, + "args": { + "External id": 246306,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917809959.828, "dur": 2.454, + "args": { + "External id": 246307,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917809976.397, "dur": 24.229, + "args": { + "External id": 246308,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917810061.443, "dur": 400.477, + "args": { + "External id": 246309,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917810096.501, "dur": 359.574, + "args": { + "External id": 246310,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5141, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917810106.869, "dur": 343.245, + "args": { + "External id": 246311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917810485.334, "dur": 2.268, + "args": { + "External id": 246312,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5143, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 4183438, "tid": 4183438, + "ts": 667917810582.065, "dur": 42478.054, + "args": { + "External id": 246313,"Record function id": 0, "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810717.548, "dur": 6.734, + "args": { + "External id": 246314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810727.807, "dur": 1.069, + "args": { + "External id": 246315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810730.430, "dur": 2.266, + "args": { + "External id": 246316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810734.156, "dur": 0.789, + "args": { + "External id": 246317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810736.267, "dur": 0.634, + "args": { + "External id": 246318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810738.288, "dur": 1.005, + "args": { + "External id": 246319,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810743.152, "dur": 0.714, + "args": { + "External id": 246320,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810745.033, "dur": 2.031, + "args": { + "External id": 246321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810748.217, "dur": 0.650, + "args": { + "External id": 246322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917810750.155, "dur": 0.652, + "args": { + "External id": 246323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917810770.511, "dur": 42244.651, + "args": { + "External id": 246324,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917810787.282, "dur": 42219.789, + "args": { + "External id": 246325,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917810804.585, "dur": 15.539, + "args": { + "External id": 246326,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917810822.770, "dur": 42150.303, + "args": { + "External id": 246327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917810825.227, "dur": 42147.235, + "args": { + "External id": 246328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917810831.514, "dur": 4.634, + "args": { + "External id": 246329,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917810838.320, "dur": 42130.836, + "args": { + "External id": 246330,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917853214.564, "dur": 33.406, + "args": { + "External id": 246331,"Sequence number": 2987541, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5162 + } + }, + { + "ph": "s", "id": 203, "pid": 4183438, "tid": 4183438, "ts": 667917853214.564, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917853232.404, "dur": 10.771, + "args": { + "External id": 246332,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917853237.955, "dur": 4.876, + "args": { + "External id": 246333,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917853318.472, "dur": 92.690, + "args": { + "External id": 246334,"Record function id": 0, "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917853412.621, "dur": 1125.294, + "args": { + "External id": 246335,"Record function id": 0, "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917853455.327, "dur": 1068.076, + "args": { + "External id": 246336,"Sequence number": 2987542, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5167 + } + }, + { + "ph": "s", "id": 202, "pid": 4183438, "tid": 4183438, "ts": 667917853455.327, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917853522.920, "dur": 46.074, + "args": { + "External id": 246337,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917853582.823, "dur": 134.800, + "args": { + "External id": 246338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917853731.236, "dur": 40.436, + "args": { + "External id": 246339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917853781.758, "dur": 30.177, + "args": { + "External id": 246340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917853842.113, "dur": 26.981, + "args": { + "External id": 246341,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917853886.792, "dur": 16.026, + "args": { + "External id": 246342,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917853922.472, "dur": 135.592, + "args": { + "External id": 246343,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917853974.263, "dur": 11.450, + "args": { + "External id": 246344,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917853979.259, "dur": 5.620, + "args": { + "External id": 246345,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917853990.190, "dur": 6.663, + "args": { + "External id": 246346,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917853998.257, "dur": 0.872, + "args": { + "External id": 246347,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917854001.355, "dur": 3.789, + "args": { + "External id": 246348,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917854069.993, "dur": 49.454, + "args": { + "External id": 246349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917854150.321, "dur": 29.718, + "args": { + "External id": 246350,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917854188.160, "dur": 55.222, + "args": { + "External id": 246351,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917854254.872, "dur": 33.236, + "args": { + "External id": 246352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917854314.417, "dur": 29.678, + "args": { + "External id": 246353,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917854349.717, "dur": 59.023, + "args": { + "External id": 246354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917854427.351, "dur": 20.292, + "args": { + "External id": 246355,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 4183438, "tid": 4183438, + "ts": 667917854605.441, "dur": 119.354, + "args": { + "External id": 246356,"Record function id": 0, "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917854805.159, "dur": 49.940, + "args": { + "External id": 246357,"Record function id": 0, "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 4183438, "tid": 4183438, + "ts": 667917854864.346, "dur": 8530.036, + "args": { + "External id": 246358,"Record function id": 0, "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 4183438, "tid": 4183438, + "ts": 667917854875.648, "dur": 965.366, + "args": { + "External id": 246359,"Record function id": 0, "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917854958.617, "dur": 9.626, + "args": { + "External id": 246360,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917854981.420, "dur": 42.130, + "args": { + "External id": 246361,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917854988.878, "dur": 2.147, + "args": { + "External id": 246362,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917854996.041, "dur": 0.297, + "args": { + "External id": 246363,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917854999.441, "dur": 0.482, + "args": { + "External id": 246364,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855000.811, "dur": 0.489, + "args": { + "External id": 246365,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855004.417, "dur": 0.350, + "args": { + "External id": 246366,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855007.919, "dur": 0.405, + "args": { + "External id": 246367,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855008.997, "dur": 2.863, + "args": { + "External id": 246368,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855012.776, "dur": 0.238, + "args": { + "External id": 246369,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855015.817, "dur": 0.227, + "args": { + "External id": 246370,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917855035.247, "dur": 48.193, + "args": { + "External id": 246371,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917855117.328, "dur": 147.855, + "args": { + "External id": 246372,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917855132.689, "dur": 4.165, + "args": { + "External id": 246373,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917855142.229, "dur": 10.004, + "args": { + "External id": 246374,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917855146.996, "dur": 4.832, + "args": { + "External id": 246375,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855150.210, "dur": 0.499, + "args": { + "External id": 246376,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917855158.556, "dur": 50.462, + "args": { + "External id": 246377,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855162.449, "dur": 2.972, + "args": { + "External id": 246378,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855166.472, "dur": 0.228, + "args": { + "External id": 246379,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855167.505, "dur": 0.267, + "args": { + "External id": 246380,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855173.494, "dur": 1.771, + "args": { + "External id": 246381,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855175.960, "dur": 0.270, + "args": { + "External id": 246382,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855177.035, "dur": 0.263, + "args": { + "External id": 246383,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855182.259, "dur": 0.258, + "args": { + "External id": 246384,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855183.351, "dur": 0.272, + "args": { + "External id": 246385,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917855186.000, "dur": 2.605, + "args": { + "External id": 246386,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917855227.523, "dur": 28.966, + "args": { + "External id": 246387,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917855320.620, "dur": 416.758, + "args": { + "External id": 246388,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917855354.666, "dur": 376.701, + "args": { + "External id": 246389,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5220, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917855365.099, "dur": 359.111, + "args": { + "External id": 246390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917855763.596, "dur": 2.739, + "args": { + "External id": 246391,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5222, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 4183438, "tid": 4183438, + "ts": 667917855862.956, "dur": 7324.435, + "args": { + "External id": 246392,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855962.533, "dur": 6.894, + "args": { + "External id": 246393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855972.779, "dur": 1.155, + "args": { + "External id": 246394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855975.707, "dur": 2.689, + "args": { + "External id": 246395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855980.259, "dur": 0.841, + "args": { + "External id": 246396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855982.300, "dur": 0.728, + "args": { + "External id": 246397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855984.390, "dur": 1.186, + "args": { + "External id": 246398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855989.959, "dur": 0.998, + "args": { + "External id": 246399,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855992.400, "dur": 1.898, + "args": { + "External id": 246400,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855995.406, "dur": 0.719, + "args": { + "External id": 246401,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917855997.364, "dur": 0.830, + "args": { + "External id": 246402,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917856018.103, "dur": 7130.427, + "args": { + "External id": 246403,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917856044.962, "dur": 7096.867, + "args": { + "External id": 246404,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917856059.087, "dur": 15.726, + "args": { + "External id": 246405,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917856077.624, "dur": 7032.207, + "args": { + "External id": 246406,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917856080.238, "dur": 7029.162, + "args": { + "External id": 246407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917856086.852, "dur": 4.801, + "args": { + "External id": 246408,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917856093.330, "dur": 7012.967, + "args": { + "External id": 246409,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917863336.495, "dur": 33.740, + "args": { + "External id": 246410,"Sequence number": 2987543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5241 + } + }, + { + "ph": "s", "id": 201, "pid": 4183438, "tid": 4183438, "ts": 667917863336.495, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917863357.063, "dur": 8.558, + "args": { + "External id": 246411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917863360.640, "dur": 4.612, + "args": { + "External id": 246412,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917863432.942, "dur": 88.836, + "args": { + "External id": 246413,"Record function id": 0, "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917863523.404, "dur": 1184.054, + "args": { + "External id": 246414,"Record function id": 0, "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917863568.003, "dur": 1122.048, + "args": { + "External id": 246415,"Sequence number": 2987544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5246 + } + }, + { + "ph": "s", "id": 200, "pid": 4183438, "tid": 4183438, "ts": 667917863568.003, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917863636.268, "dur": 80.259, + "args": { + "External id": 246416,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917863733.899, "dur": 96.638, + "args": { + "External id": 246417,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917863840.357, "dur": 35.558, + "args": { + "External id": 246418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917863886.934, "dur": 30.469, + "args": { + "External id": 246419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917863944.390, "dur": 27.210, + "args": { + "External id": 246420,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917863989.522, "dur": 18.487, + "args": { + "External id": 246421,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917864030.973, "dur": 132.343, + "args": { + "External id": 246422,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917864081.784, "dur": 12.712, + "args": { + "External id": 246423,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917864087.284, "dur": 6.472, + "args": { + "External id": 246424,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917864097.410, "dur": 4.558, + "args": { + "External id": 246425,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917864103.306, "dur": 0.969, + "args": { + "External id": 246426,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917864106.699, "dur": 3.451, + "args": { + "External id": 246427,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917864174.456, "dur": 73.256, + "args": { + "External id": 246428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917864287.875, "dur": 32.604, + "args": { + "External id": 246429,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917864329.432, "dur": 42.094, + "args": { + "External id": 246430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917864380.648, "dur": 47.362, + "args": { + "External id": 246431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917864449.487, "dur": 29.311, + "args": { + "External id": 246432,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917864484.687, "dur": 53.362, + "args": { + "External id": 246433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917864559.994, "dur": 20.879, + "args": { + "External id": 246434,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 4183438, "tid": 4183438, + "ts": 667917864776.402, "dur": 80.877, + "args": { + "External id": 246435,"Record function id": 0, "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917864932.305, "dur": 52.139, + "args": { + "External id": 246436,"Record function id": 0, "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 4183438, "tid": 4183438, + "ts": 667917864993.334, "dur": 9303.119, + "args": { + "External id": 246437,"Record function id": 0, "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 4183438, "tid": 4183438, + "ts": 667917865002.331, "dur": 1052.589, + "args": { + "External id": 246438,"Record function id": 0, "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917865085.806, "dur": 9.837, + "args": { + "External id": 246439,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917865109.349, "dur": 38.918, + "args": { + "External id": 246440,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865115.269, "dur": 2.225, + "args": { + "External id": 246441,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865122.515, "dur": 0.340, + "args": { + "External id": 246442,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865124.560, "dur": 0.281, + "args": { + "External id": 246443,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865126.345, "dur": 0.299, + "args": { + "External id": 246444,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865130.211, "dur": 0.408, + "args": { + "External id": 246445,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865131.824, "dur": 0.434, + "args": { + "External id": 246446,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865133.711, "dur": 3.725, + "args": { + "External id": 246447,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865138.840, "dur": 0.188, + "args": { + "External id": 246448,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865140.936, "dur": 0.263, + "args": { + "External id": 246449,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917865160.223, "dur": 67.846, + "args": { + "External id": 246450,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917865267.474, "dur": 127.206, + "args": { + "External id": 246451,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917865281.633, "dur": 4.611, + "args": { + "External id": 246452,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917865291.936, "dur": 11.145, + "args": { + "External id": 246453,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917865296.509, "dur": 6.167, + "args": { + "External id": 246454,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865300.493, "dur": 0.552, + "args": { + "External id": 246455,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917865310.339, "dur": 33.558, + "args": { + "External id": 246456,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865312.753, "dur": 2.835, + "args": { + "External id": 246457,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865317.210, "dur": 0.320, + "args": { + "External id": 246458,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865319.207, "dur": 0.353, + "args": { + "External id": 246459,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865323.217, "dur": 1.923, + "args": { + "External id": 246460,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865326.421, "dur": 0.209, + "args": { + "External id": 246461,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865328.018, "dur": 0.394, + "args": { + "External id": 246462,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865332.483, "dur": 0.152, + "args": { + "External id": 246463,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865334.065, "dur": 0.219, + "args": { + "External id": 246464,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917865335.825, "dur": 2.500, + "args": { + "External id": 246465,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917865358.896, "dur": 27.170, + "args": { + "External id": 246466,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917865448.834, "dur": 499.832, + "args": { + "External id": 246467,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917865485.090, "dur": 457.575, + "args": { + "External id": 246468,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5299, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917865496.099, "dur": 439.449, + "args": { + "External id": 246469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917865976.198, "dur": 2.695, + "args": { + "External id": 246470,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5301, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 4183438, "tid": 4183438, + "ts": 667917866077.167, "dur": 8012.691, + "args": { + "External id": 246471,"Record function id": 0, "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866173.556, "dur": 7.161, + "args": { + "External id": 246472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866184.546, "dur": 1.255, + "args": { + "External id": 246473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866187.761, "dur": 3.038, + "args": { + "External id": 246474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866215.473, "dur": 1.577, + "args": { + "External id": 246475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866220.366, "dur": 0.947, + "args": { + "External id": 246476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866222.837, "dur": 1.107, + "args": { + "External id": 246477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866227.493, "dur": 0.869, + "args": { + "External id": 246478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866229.815, "dur": 2.725, + "args": { + "External id": 246479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866234.158, "dur": 1.213, + "args": { + "External id": 246480,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917866237.824, "dur": 0.954, + "args": { + "External id": 246481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917866260.467, "dur": 7784.297, + "args": { + "External id": 246482,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917866277.947, "dur": 7760.197, + "args": { + "External id": 246483,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917866296.808, "dur": 16.014, + "args": { + "External id": 246484,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917866315.566, "dur": 7689.011, + "args": { + "External id": 246485,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917866318.076, "dur": 7685.932, + "args": { + "External id": 246486,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917866324.782, "dur": 8.203, + "args": { + "External id": 246487,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917866334.879, "dur": 7665.794, + "args": { + "External id": 246488,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917874239.311, "dur": 30.488, + "args": { + "External id": 246489,"Sequence number": 2987545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5320 + } + }, + { + "ph": "s", "id": 199, "pid": 4183438, "tid": 4183438, "ts": 667917874239.311, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917874256.856, "dur": 8.001, + "args": { + "External id": 246490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917874260.382, "dur": 4.130, + "args": { + "External id": 246491,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917874337.007, "dur": 90.553, + "args": { + "External id": 246492,"Record function id": 0, "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917874429.752, "dur": 1103.551, + "args": { + "External id": 246493,"Record function id": 0, "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917874474.096, "dur": 1043.868, + "args": { + "External id": 246494,"Sequence number": 2987546, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5325 + } + }, + { + "ph": "s", "id": 198, "pid": 4183438, "tid": 4183438, "ts": 667917874474.096, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917874545.258, "dur": 48.141, + "args": { + "External id": 246495,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917874606.455, "dur": 137.160, + "args": { + "External id": 246496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917874758.101, "dur": 40.026, + "args": { + "External id": 246497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917874808.140, "dur": 29.046, + "args": { + "External id": 246498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917874864.116, "dur": 29.156, + "args": { + "External id": 246499,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917874910.189, "dur": 14.599, + "args": { + "External id": 246500,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917874944.729, "dur": 129.590, + "args": { + "External id": 246501,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917874993.949, "dur": 11.465, + "args": { + "External id": 246502,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917874999.175, "dur": 5.489, + "args": { + "External id": 246503,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917875008.298, "dur": 6.816, + "args": { + "External id": 246504,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917875016.608, "dur": 0.812, + "args": { + "External id": 246505,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917875019.834, "dur": 4.158, + "args": { + "External id": 246506,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917875085.473, "dur": 45.277, + "args": { + "External id": 246507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917875161.569, "dur": 29.492, + "args": { + "External id": 246508,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917875218.497, "dur": 42.525, + "args": { + "External id": 246509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917875269.964, "dur": 31.985, + "args": { + "External id": 246510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917875327.431, "dur": 24.957, + "args": { + "External id": 246511,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917875358.632, "dur": 43.254, + "args": { + "External id": 246512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917875422.555, "dur": 18.797, + "args": { + "External id": 246513,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 4183438, "tid": 4183438, + "ts": 667917875602.227, "dur": 122.325, + "args": { + "External id": 246514,"Record function id": 0, "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917875805.651, "dur": 50.184, + "args": { + "External id": 246515,"Record function id": 0, "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 4183438, "tid": 4183438, + "ts": 667917875865.932, "dur": 8541.257, + "args": { + "External id": 246516,"Record function id": 0, "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 4183438, "tid": 4183438, + "ts": 667917875873.963, "dur": 960.522, + "args": { + "External id": 246517,"Record function id": 0, "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917875956.045, "dur": 9.691, + "args": { + "External id": 246518,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917875979.425, "dur": 40.424, + "args": { + "External id": 246519,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917875985.131, "dur": 2.327, + "args": { + "External id": 246520,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917875992.252, "dur": 0.381, + "args": { + "External id": 246521,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917875994.336, "dur": 0.454, + "args": { + "External id": 246522,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917875996.679, "dur": 0.295, + "args": { + "External id": 246523,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876000.445, "dur": 0.337, + "args": { + "External id": 246524,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876002.670, "dur": 0.579, + "args": { + "External id": 246525,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876004.952, "dur": 3.997, + "args": { + "External id": 246526,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876010.678, "dur": 0.243, + "args": { + "External id": 246527,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876012.154, "dur": 0.255, + "args": { + "External id": 246528,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917876032.594, "dur": 45.253, + "args": { + "External id": 246529,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917876113.576, "dur": 142.009, + "args": { + "External id": 246530,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917876127.003, "dur": 3.496, + "args": { + "External id": 246531,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917876135.922, "dur": 10.904, + "args": { + "External id": 246532,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917876140.617, "dur": 5.799, + "args": { + "External id": 246533,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876144.725, "dur": 0.556, + "args": { + "External id": 246534,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917876153.445, "dur": 32.725, + "args": { + "External id": 246535,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876155.374, "dur": 3.066, + "args": { + "External id": 246536,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876159.793, "dur": 0.425, + "args": { + "External id": 246537,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876162.086, "dur": 0.325, + "args": { + "External id": 246538,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876165.715, "dur": 1.951, + "args": { + "External id": 246539,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876168.716, "dur": 0.294, + "args": { + "External id": 246540,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876170.541, "dur": 0.460, + "args": { + "External id": 246541,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876174.572, "dur": 0.440, + "args": { + "External id": 246542,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876176.576, "dur": 0.363, + "args": { + "External id": 246543,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917876178.302, "dur": 2.280, + "args": { + "External id": 246544,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917876218.114, "dur": 27.306, + "args": { + "External id": 246545,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917876312.565, "dur": 418.072, + "args": { + "External id": 246546,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917876345.953, "dur": 378.908, + "args": { + "External id": 246547,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5378, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917876356.751, "dur": 361.650, + "args": { + "External id": 246548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917876755.032, "dur": 2.733, + "args": { + "External id": 246549,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5380, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 4183438, "tid": 4183438, + "ts": 667917876856.886, "dur": 7348.248, + "args": { + "External id": 246550,"Record function id": 0, "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876964.994, "dur": 7.488, + "args": { + "External id": 246551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876976.125, "dur": 1.413, + "args": { + "External id": 246552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876979.493, "dur": 2.699, + "args": { + "External id": 246553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876985.274, "dur": 1.334, + "args": { + "External id": 246554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876988.335, "dur": 0.876, + "args": { + "External id": 246555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876990.898, "dur": 0.880, + "args": { + "External id": 246556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876993.314, "dur": 0.791, + "args": { + "External id": 246557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917876997.703, "dur": 3.219, + "args": { + "External id": 246558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917877002.358, "dur": 0.923, + "args": { + "External id": 246559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917877004.935, "dur": 0.791, + "args": { + "External id": 246560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917877025.221, "dur": 7123.962, + "args": { + "External id": 246561,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917877042.533, "dur": 7099.761, + "args": { + "External id": 246562,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917877057.965, "dur": 15.897, + "args": { + "External id": 246563,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917877078.569, "dur": 7031.805, + "args": { + "External id": 246564,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917877081.201, "dur": 7028.648, + "args": { + "External id": 246565,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917877088.126, "dur": 6.968, + "args": { + "External id": 246566,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917877096.900, "dur": 7009.794, + "args": { + "External id": 246567,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917884350.249, "dur": 30.350, + "args": { + "External id": 246568,"Sequence number": 2987547, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5399 + } + }, + { + "ph": "s", "id": 197, "pid": 4183438, "tid": 4183438, "ts": 667917884350.249, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917884367.613, "dur": 8.420, + "args": { + "External id": 246569,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917884371.115, "dur": 4.634, + "args": { + "External id": 246570,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917884447.332, "dur": 90.577, + "args": { + "External id": 246571,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917884539.779, "dur": 1077.971, + "args": { + "External id": 246572,"Record function id": 0, "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917884581.548, "dur": 1021.742, + "args": { + "External id": 246573,"Sequence number": 2987548, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5404 + } + }, + { + "ph": "s", "id": 196, "pid": 4183438, "tid": 4183438, "ts": 667917884581.548, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917884647.394, "dur": 76.933, + "args": { + "External id": 246574,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917884740.645, "dur": 93.505, + "args": { + "External id": 246575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917884843.886, "dur": 34.422, + "args": { + "External id": 246576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917884887.670, "dur": 30.029, + "args": { + "External id": 246577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917884941.732, "dur": 24.693, + "args": { + "External id": 246578,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917884984.356, "dur": 14.785, + "args": { + "External id": 246579,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917885019.159, "dur": 139.380, + "args": { + "External id": 246580,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917885068.512, "dur": 10.900, + "args": { + "External id": 246581,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917885073.642, "dur": 5.000, + "args": { + "External id": 246582,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917885082.075, "dur": 6.105, + "args": { + "External id": 246583,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917885097.724, "dur": 1.190, + "args": { + "External id": 246584,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917885101.439, "dur": 4.885, + "args": { + "External id": 246585,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917885169.752, "dur": 64.449, + "args": { + "External id": 246586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917885268.571, "dur": 30.048, + "args": { + "External id": 246587,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917885307.582, "dur": 41.554, + "args": { + "External id": 246588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917885358.373, "dur": 31.871, + "args": { + "External id": 246589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917885412.016, "dur": 26.149, + "args": { + "External id": 246590,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917885444.137, "dur": 44.456, + "args": { + "External id": 246591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917885512.229, "dur": 19.004, + "args": { + "External id": 246592,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 4183438, "tid": 4183438, + "ts": 667917885723.394, "dur": 79.693, + "args": { + "External id": 246593,"Record function id": 0, "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917885877.586, "dur": 50.395, + "args": { + "External id": 246594,"Record function id": 0, "Ev Idx": 5425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 4183438, "tid": 4183438, + "ts": 667917885936.929, "dur": 8453.792, + "args": { + "External id": 246595,"Record function id": 0, "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 4183438, "tid": 4183438, + "ts": 667917885944.534, "dur": 942.921, + "args": { + "External id": 246596,"Record function id": 0, "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917886026.162, "dur": 8.900, + "args": { + "External id": 246597,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917886047.883, "dur": 40.953, + "args": { + "External id": 246598,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886053.321, "dur": 2.580, + "args": { + "External id": 246599,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886062.020, "dur": 0.391, + "args": { + "External id": 246600,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886064.247, "dur": 0.531, + "args": { + "External id": 246601,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886066.324, "dur": 0.532, + "args": { + "External id": 246602,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886070.557, "dur": 0.342, + "args": { + "External id": 246603,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886072.439, "dur": 0.414, + "args": { + "External id": 246604,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886074.256, "dur": 3.256, + "args": { + "External id": 246605,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886079.100, "dur": 0.422, + "args": { + "External id": 246606,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886081.438, "dur": 0.169, + "args": { + "External id": 246607,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917886100.429, "dur": 43.404, + "args": { + "External id": 246608,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917886178.794, "dur": 139.163, + "args": { + "External id": 246609,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917886204.737, "dur": 5.572, + "args": { + "External id": 246610,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917886217.398, "dur": 11.611, + "args": { + "External id": 246611,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917886222.015, "dur": 6.578, + "args": { + "External id": 246612,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886226.185, "dur": 0.698, + "args": { + "External id": 246613,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917886236.676, "dur": 32.206, + "args": { + "External id": 246614,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886239.115, "dur": 3.475, + "args": { + "External id": 246615,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886244.007, "dur": 0.315, + "args": { + "External id": 246616,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886246.299, "dur": 0.353, + "args": { + "External id": 246617,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886251.021, "dur": 2.024, + "args": { + "External id": 246618,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886254.616, "dur": 0.194, + "args": { + "External id": 246619,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886256.120, "dur": 0.340, + "args": { + "External id": 246620,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886259.567, "dur": 0.171, + "args": { + "External id": 246621,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886260.917, "dur": 0.312, + "args": { + "External id": 246622,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917886262.275, "dur": 2.369, + "args": { + "External id": 246623,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917886282.907, "dur": 26.693, + "args": { + "External id": 246624,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917886371.135, "dur": 411.281, + "args": { + "External id": 246625,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917886403.139, "dur": 373.388, + "args": { + "External id": 246626,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5457, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917886413.863, "dur": 356.709, + "args": { + "External id": 246627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917886806.729, "dur": 2.663, + "args": { + "External id": 246628,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5459, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 4183438, "tid": 4183438, + "ts": 667917886909.930, "dur": 7263.442, + "args": { + "External id": 246629,"Record function id": 0, "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887011.180, "dur": 6.855, + "args": { + "External id": 246630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887022.054, "dur": 1.043, + "args": { + "External id": 246631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887025.251, "dur": 2.312, + "args": { + "External id": 246632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887029.354, "dur": 1.039, + "args": { + "External id": 246633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887031.902, "dur": 0.958, + "args": { + "External id": 246634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887034.470, "dur": 0.708, + "args": { + "External id": 246635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887038.489, "dur": 0.902, + "args": { + "External id": 246636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887040.899, "dur": 2.525, + "args": { + "External id": 246637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887045.080, "dur": 1.002, + "args": { + "External id": 246638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917887047.421, "dur": 1.454, + "args": { + "External id": 246639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917887080.622, "dur": 7036.171, + "args": { + "External id": 246640,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917887097.765, "dur": 7011.607, + "args": { + "External id": 246641,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917887112.317, "dur": 16.609, + "args": { + "External id": 246642,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917887131.674, "dur": 6939.432, + "args": { + "External id": 246643,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917887134.353, "dur": 6936.331, + "args": { + "External id": 246644,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917887140.868, "dur": 6.889, + "args": { + "External id": 246645,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917887149.577, "dur": 6917.696, + "args": { + "External id": 246646,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917894333.224, "dur": 30.606, + "args": { + "External id": 246647,"Sequence number": 2987549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5478 + } + }, + { + "ph": "s", "id": 195, "pid": 4183438, "tid": 4183438, "ts": 667917894333.224, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917894350.151, "dur": 8.752, + "args": { + "External id": 246648,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917894353.782, "dur": 4.767, + "args": { + "External id": 246649,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917894430.537, "dur": 93.578, + "args": { + "External id": 246650,"Record function id": 0, "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917894525.766, "dur": 1098.279, + "args": { + "External id": 246651,"Record function id": 0, "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917894570.259, "dur": 1039.452, + "args": { + "External id": 246652,"Sequence number": 2987550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5483 + } + }, + { + "ph": "s", "id": 194, "pid": 4183438, "tid": 4183438, "ts": 667917894570.259, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917894633.784, "dur": 92.325, + "args": { + "External id": 246653,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917894742.838, "dur": 95.250, + "args": { + "External id": 246654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917894849.389, "dur": 35.348, + "args": { + "External id": 246655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917894894.084, "dur": 29.565, + "args": { + "External id": 246656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917894949.661, "dur": 27.074, + "args": { + "External id": 246657,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917894993.444, "dur": 15.558, + "args": { + "External id": 246658,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917895028.064, "dur": 133.824, + "args": { + "External id": 246659,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917895078.572, "dur": 11.325, + "args": { + "External id": 246660,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917895083.504, "dur": 5.636, + "args": { + "External id": 246661,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917895093.168, "dur": 6.075, + "args": { + "External id": 246662,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917895100.659, "dur": 1.644, + "args": { + "External id": 246663,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917895104.806, "dur": 4.569, + "args": { + "External id": 246664,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917895172.998, "dur": 64.983, + "args": { + "External id": 246665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917895272.159, "dur": 31.879, + "args": { + "External id": 246666,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917895315.104, "dur": 41.633, + "args": { + "External id": 246667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917895365.302, "dur": 32.508, + "args": { + "External id": 246668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917895420.158, "dur": 27.326, + "args": { + "External id": 246669,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917895453.098, "dur": 40.991, + "args": { + "External id": 246670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917895514.203, "dur": 18.881, + "args": { + "External id": 246671,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 4183438, "tid": 4183438, + "ts": 667917895728.429, "dur": 81.899, + "args": { + "External id": 246672,"Record function id": 0, "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917895884.480, "dur": 49.072, + "args": { + "External id": 246673,"Record function id": 0, "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 4183438, "tid": 4183438, + "ts": 667917895942.705, "dur": 8462.186, + "args": { + "External id": 246674,"Record function id": 0, "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 4183438, "tid": 4183438, + "ts": 667917895950.950, "dur": 944.999, + "args": { + "External id": 246675,"Record function id": 0, "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917896031.756, "dur": 9.046, + "args": { + "External id": 246676,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917896054.676, "dur": 39.136, + "args": { + "External id": 246677,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896060.516, "dur": 2.194, + "args": { + "External id": 246678,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896067.368, "dur": 0.244, + "args": { + "External id": 246679,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896070.049, "dur": 0.357, + "args": { + "External id": 246680,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896071.782, "dur": 0.375, + "args": { + "External id": 246681,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896075.678, "dur": 0.451, + "args": { + "External id": 246682,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896077.561, "dur": 0.240, + "args": { + "External id": 246683,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896078.876, "dur": 4.184, + "args": { + "External id": 246684,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896084.223, "dur": 0.398, + "args": { + "External id": 246685,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896086.037, "dur": 0.303, + "args": { + "External id": 246686,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917896105.110, "dur": 44.726, + "args": { + "External id": 246687,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917896184.466, "dur": 154.822, + "args": { + "External id": 246688,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917896214.342, "dur": 5.563, + "args": { + "External id": 246689,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917896226.154, "dur": 11.360, + "args": { + "External id": 246690,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917896230.551, "dur": 6.503, + "args": { + "External id": 246691,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896234.377, "dur": 0.848, + "args": { + "External id": 246692,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917896244.198, "dur": 34.893, + "args": { + "External id": 246693,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896246.198, "dur": 2.486, + "args": { + "External id": 246694,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896252.396, "dur": 0.422, + "args": { + "External id": 246695,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896254.395, "dur": 0.511, + "args": { + "External id": 246696,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896258.560, "dur": 1.936, + "args": { + "External id": 246697,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896262.284, "dur": 0.558, + "args": { + "External id": 246698,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896264.185, "dur": 2.306, + "args": { + "External id": 246699,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896267.912, "dur": 0.163, + "args": { + "External id": 246700,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896269.141, "dur": 0.347, + "args": { + "External id": 246701,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917896273.140, "dur": 0.460, + "args": { + "External id": 246702,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917896302.066, "dur": 28.678, + "args": { + "External id": 246703,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917896393.753, "dur": 402.682, + "args": { + "External id": 246704,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917896424.388, "dur": 366.314, + "args": { + "External id": 246705,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5536, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917896437.457, "dur": 347.140, + "args": { + "External id": 246706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917896818.543, "dur": 2.426, + "args": { + "External id": 246707,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5538, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 4183438, "tid": 4183438, + "ts": 667917896917.628, "dur": 7292.879, + "args": { + "External id": 246708,"Record function id": 0, "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897015.976, "dur": 6.897, + "args": { + "External id": 246709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897026.565, "dur": 1.278, + "args": { + "External id": 246710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897029.711, "dur": 2.836, + "args": { + "External id": 246711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897034.311, "dur": 0.997, + "args": { + "External id": 246712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897036.829, "dur": 1.107, + "args": { + "External id": 246713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897040.024, "dur": 0.933, + "args": { + "External id": 246714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897042.603, "dur": 0.860, + "args": { + "External id": 246715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897045.518, "dur": 2.643, + "args": { + "External id": 246716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897050.166, "dur": 0.837, + "args": { + "External id": 246717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917897053.221, "dur": 0.650, + "args": { + "External id": 246718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917897072.558, "dur": 7084.925, + "args": { + "External id": 246719,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917897088.342, "dur": 7062.594, + "args": { + "External id": 246720,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917897102.939, "dur": 16.239, + "args": { + "External id": 246721,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917897122.071, "dur": 6997.096, + "args": { + "External id": 246722,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917897124.607, "dur": 6994.053, + "args": { + "External id": 246723,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917897131.708, "dur": 6.726, + "args": { + "External id": 246724,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917897140.595, "dur": 6974.932, + "args": { + "External id": 246725,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917904349.234, "dur": 29.669, + "args": { + "External id": 246726,"Sequence number": 2987551, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5557 + } + }, + { + "ph": "s", "id": 193, "pid": 4183438, "tid": 4183438, "ts": 667917904349.234, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917904365.727, "dur": 8.618, + "args": { + "External id": 246727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917904369.563, "dur": 4.520, + "args": { + "External id": 246728,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917904441.692, "dur": 93.908, + "args": { + "External id": 246729,"Record function id": 0, "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917904537.271, "dur": 1069.918, + "args": { + "External id": 246730,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917904577.095, "dur": 1015.714, + "args": { + "External id": 246731,"Sequence number": 2987552, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5562 + } + }, + { + "ph": "s", "id": 192, "pid": 4183438, "tid": 4183438, "ts": 667917904577.095, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917904644.776, "dur": 74.710, + "args": { + "External id": 246732,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917904736.646, "dur": 95.018, + "args": { + "External id": 246733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917904842.504, "dur": 35.707, + "args": { + "External id": 246734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917904887.350, "dur": 30.137, + "args": { + "External id": 246735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917904942.984, "dur": 26.719, + "args": { + "External id": 246736,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917904987.195, "dur": 14.356, + "args": { + "External id": 246737,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917905020.165, "dur": 129.065, + "args": { + "External id": 246738,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917905069.468, "dur": 12.028, + "args": { + "External id": 246739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917905074.310, "dur": 6.213, + "args": { + "External id": 246740,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917905084.237, "dur": 5.267, + "args": { + "External id": 246741,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917905090.650, "dur": 1.463, + "args": { + "External id": 246742,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917905094.506, "dur": 4.100, + "args": { + "External id": 246743,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917905160.589, "dur": 63.551, + "args": { + "External id": 246744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917905258.537, "dur": 29.000, + "args": { + "External id": 246745,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917905307.374, "dur": 42.567, + "args": { + "External id": 246746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917905359.303, "dur": 31.793, + "args": { + "External id": 246747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917905412.490, "dur": 25.630, + "args": { + "External id": 246748,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917905444.026, "dur": 40.658, + "args": { + "External id": 246749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917905504.265, "dur": 18.155, + "args": { + "External id": 246750,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 4183438, "tid": 4183438, + "ts": 667917905729.903, "dur": 80.933, + "args": { + "External id": 246751,"Record function id": 0, "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917905889.263, "dur": 50.349, + "args": { + "External id": 246752,"Record function id": 0, "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 4183438, "tid": 4183438, + "ts": 667917905948.923, "dur": 8505.808, + "args": { + "External id": 246753,"Record function id": 0, "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 4183438, "tid": 4183438, + "ts": 667917905957.575, "dur": 969.585, + "args": { + "External id": 246754,"Record function id": 0, "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917906039.530, "dur": 9.282, + "args": { + "External id": 246755,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917906061.991, "dur": 37.557, + "args": { + "External id": 246756,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906067.254, "dur": 2.186, + "args": { + "External id": 246757,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906074.164, "dur": 0.380, + "args": { + "External id": 246758,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906075.822, "dur": 0.527, + "args": { + "External id": 246759,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906077.689, "dur": 0.368, + "args": { + "External id": 246760,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906081.253, "dur": 0.799, + "args": { + "External id": 246761,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906083.247, "dur": 0.535, + "args": { + "External id": 246762,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906084.946, "dur": 4.191, + "args": { + "External id": 246763,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906090.244, "dur": 0.384, + "args": { + "External id": 246764,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906091.939, "dur": 0.480, + "args": { + "External id": 246765,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917906109.985, "dur": 43.540, + "args": { + "External id": 246766,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917906187.532, "dur": 133.972, + "args": { + "External id": 246767,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917906214.669, "dur": 5.428, + "args": { + "External id": 246768,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917906226.100, "dur": 11.166, + "args": { + "External id": 246769,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917906230.738, "dur": 6.079, + "args": { + "External id": 246770,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906234.354, "dur": 0.823, + "args": { + "External id": 246771,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917906244.099, "dur": 30.245, + "args": { + "External id": 246772,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906245.898, "dur": 2.993, + "args": { + "External id": 246773,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906250.437, "dur": 0.580, + "args": { + "External id": 246774,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906252.200, "dur": 0.292, + "args": { + "External id": 246775,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906256.046, "dur": 1.653, + "args": { + "External id": 246776,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906258.727, "dur": 0.409, + "args": { + "External id": 246777,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906260.438, "dur": 0.476, + "args": { + "External id": 246778,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906263.792, "dur": 0.162, + "args": { + "External id": 246779,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906265.195, "dur": 0.538, + "args": { + "External id": 246780,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917906266.671, "dur": 2.106, + "args": { + "External id": 246781,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917906286.376, "dur": 26.068, + "args": { + "External id": 246782,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917906376.843, "dur": 448.199, + "args": { + "External id": 246783,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917906405.704, "dur": 413.249, + "args": { + "External id": 246784,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5615, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917906442.658, "dur": 368.061, + "args": { + "External id": 246785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917906848.865, "dur": 2.242, + "args": { + "External id": 246786,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5617, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 4183438, "tid": 4183438, + "ts": 667917906948.697, "dur": 7317.561, + "args": { + "External id": 246787,"Record function id": 0, "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907050.376, "dur": 7.015, + "args": { + "External id": 246788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907060.640, "dur": 1.089, + "args": { + "External id": 246789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907063.564, "dur": 3.293, + "args": { + "External id": 246790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907068.690, "dur": 1.087, + "args": { + "External id": 246791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907071.527, "dur": 1.082, + "args": { + "External id": 246792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907074.133, "dur": 0.899, + "args": { + "External id": 246793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907077.017, "dur": 0.937, + "args": { + "External id": 246794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907079.996, "dur": 2.495, + "args": { + "External id": 246795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907084.118, "dur": 0.994, + "args": { + "External id": 246796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917907086.991, "dur": 0.563, + "args": { + "External id": 246797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917907106.863, "dur": 7121.924, + "args": { + "External id": 246798,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917907123.955, "dur": 7097.961, + "args": { + "External id": 246799,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917907138.301, "dur": 16.244, + "args": { + "External id": 246800,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917907157.465, "dur": 7021.022, + "args": { + "External id": 246801,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917907159.953, "dur": 7017.925, + "args": { + "External id": 246802,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917907167.064, "dur": 6.999, + "args": { + "External id": 246803,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917907176.130, "dur": 6998.530, + "args": { + "External id": 246804,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917914398.311, "dur": 32.331, + "args": { + "External id": 246805,"Sequence number": 2987553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5636 + } + }, + { + "ph": "s", "id": 191, "pid": 4183438, "tid": 4183438, "ts": 667917914398.311, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917914418.614, "dur": 7.642, + "args": { + "External id": 246806,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917914421.790, "dur": 4.226, + "args": { + "External id": 246807,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917914492.831, "dur": 92.660, + "args": { + "External id": 246808,"Record function id": 0, "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917914587.043, "dur": 1123.404, + "args": { + "External id": 246809,"Record function id": 0, "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917914627.829, "dur": 1066.601, + "args": { + "External id": 246810,"Sequence number": 2987554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5641 + } + }, + { + "ph": "s", "id": 190, "pid": 4183438, "tid": 4183438, "ts": 667917914627.829, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917914741.746, "dur": 47.954, + "args": { + "External id": 246811,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917914803.463, "dur": 92.851, + "args": { + "External id": 246812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917914907.228, "dur": 35.671, + "args": { + "External id": 246813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917914951.858, "dur": 30.397, + "args": { + "External id": 246814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917915007.440, "dur": 24.222, + "args": { + "External id": 246815,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917915049.456, "dur": 14.170, + "args": { + "External id": 246816,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917915083.701, "dur": 149.632, + "args": { + "External id": 246817,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917915136.144, "dur": 11.572, + "args": { + "External id": 246818,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917915141.770, "dur": 5.331, + "args": { + "External id": 246819,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917915150.707, "dur": 5.220, + "args": { + "External id": 246820,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917915157.626, "dur": 1.410, + "args": { + "External id": 246821,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917915161.359, "dur": 4.242, + "args": { + "External id": 246822,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917915245.604, "dur": 54.215, + "args": { + "External id": 246823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917915330.166, "dur": 28.154, + "args": { + "External id": 246824,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917915368.284, "dur": 38.675, + "args": { + "External id": 246825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917915415.941, "dur": 32.734, + "args": { + "External id": 246826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917915469.733, "dur": 26.038, + "args": { + "External id": 246827,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917915501.470, "dur": 45.679, + "args": { + "External id": 246828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917915565.515, "dur": 18.540, + "args": { + "External id": 246829,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 4183438, "tid": 4183438, + "ts": 667917915779.309, "dur": 83.452, + "args": { + "External id": 246830,"Record function id": 0, "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917915937.779, "dur": 49.700, + "args": { + "External id": 246831,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 4183438, "tid": 4183438, + "ts": 667917915995.958, "dur": 8502.324, + "args": { + "External id": 246832,"Record function id": 0, "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 4183438, "tid": 4183438, + "ts": 667917916005.122, "dur": 1004.209, + "args": { + "External id": 246833,"Record function id": 0, "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917916087.348, "dur": 9.729, + "args": { + "External id": 246834,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917916110.062, "dur": 41.937, + "args": { + "External id": 246835,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916115.856, "dur": 2.297, + "args": { + "External id": 246836,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916122.913, "dur": 0.244, + "args": { + "External id": 246837,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916124.873, "dur": 0.529, + "args": { + "External id": 246838,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916127.035, "dur": 0.203, + "args": { + "External id": 246839,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916131.896, "dur": 0.386, + "args": { + "External id": 246840,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916133.941, "dur": 0.554, + "args": { + "External id": 246841,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916135.955, "dur": 4.354, + "args": { + "External id": 246842,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916142.155, "dur": 0.421, + "args": { + "External id": 246843,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916144.095, "dur": 0.471, + "args": { + "External id": 246844,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917916166.041, "dur": 59.692, + "args": { + "External id": 246845,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917916263.221, "dur": 125.329, + "args": { + "External id": 246846,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917916275.153, "dur": 7.342, + "args": { + "External id": 246847,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917916288.213, "dur": 12.086, + "args": { + "External id": 246848,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917916293.106, "dur": 6.783, + "args": { + "External id": 246849,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916297.316, "dur": 0.884, + "args": { + "External id": 246850,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917916307.673, "dur": 32.278, + "args": { + "External id": 246851,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916310.354, "dur": 0.521, + "args": { + "External id": 246852,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916312.803, "dur": 2.678, + "args": { + "External id": 246853,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916316.984, "dur": 0.440, + "args": { + "External id": 246854,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916319.211, "dur": 1.891, + "args": { + "External id": 246855,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916324.713, "dur": 0.235, + "args": { + "External id": 246856,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916326.668, "dur": 0.656, + "args": { + "External id": 246857,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916329.011, "dur": 0.515, + "args": { + "External id": 246858,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916332.504, "dur": 0.177, + "args": { + "External id": 246859,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917916334.431, "dur": 0.362, + "args": { + "External id": 246860,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917916352.586, "dur": 27.949, + "args": { + "External id": 246861,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917916443.165, "dur": 459.109, + "args": { + "External id": 246862,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917916470.905, "dur": 425.656, + "args": { + "External id": 246863,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5694, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917916481.606, "dur": 408.865, + "args": { + "External id": 246864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917916926.358, "dur": 2.485, + "args": { + "External id": 246865,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5696, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 4183438, "tid": 4183438, + "ts": 667917917030.874, "dur": 7268.910, + "args": { + "External id": 246866,"Record function id": 0, "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917136.229, "dur": 7.054, + "args": { + "External id": 246867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917146.619, "dur": 1.095, + "args": { + "External id": 246868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917149.389, "dur": 2.748, + "args": { + "External id": 246869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917153.359, "dur": 0.940, + "args": { + "External id": 246870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917155.809, "dur": 1.181, + "args": { + "External id": 246871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917158.473, "dur": 0.795, + "args": { + "External id": 246872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917161.233, "dur": 1.021, + "args": { + "External id": 246873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917164.057, "dur": 2.461, + "args": { + "External id": 246874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917168.533, "dur": 0.622, + "args": { + "External id": 246875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917917170.792, "dur": 0.910, + "args": { + "External id": 246876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917917190.818, "dur": 7070.195, + "args": { + "External id": 246877,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917917226.108, "dur": 7028.225, + "args": { + "External id": 246878,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917917242.761, "dur": 15.542, + "args": { + "External id": 246879,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917917261.027, "dur": 6961.488, + "args": { + "External id": 246880,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917917263.901, "dur": 6958.098, + "args": { + "External id": 246881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917917270.312, "dur": 6.345, + "args": { + "External id": 246882,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917917278.376, "dur": 6940.526, + "args": { + "External id": 246883,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917924436.419, "dur": 36.142, + "args": { + "External id": 246884,"Sequence number": 2987555, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5715 + } + }, + { + "ph": "s", "id": 189, "pid": 4183438, "tid": 4183438, "ts": 667917924436.419, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917924459.656, "dur": 7.674, + "args": { + "External id": 246885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917924462.842, "dur": 4.236, + "args": { + "External id": 246886,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917924538.133, "dur": 92.723, + "args": { + "External id": 246887,"Record function id": 0, "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917924632.678, "dur": 1111.164, + "args": { + "External id": 246888,"Record function id": 0, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917924706.152, "dur": 1021.675, + "args": { + "External id": 246889,"Sequence number": 2987556, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5720 + } + }, + { + "ph": "s", "id": 188, "pid": 4183438, "tid": 4183438, "ts": 667917924706.152, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917924773.770, "dur": 46.287, + "args": { + "External id": 246890,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917924833.735, "dur": 93.962, + "args": { + "External id": 246891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917924937.782, "dur": 35.634, + "args": { + "External id": 246892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917924982.646, "dur": 29.958, + "args": { + "External id": 246893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917925037.078, "dur": 24.497, + "args": { + "External id": 246894,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917925078.870, "dur": 14.142, + "args": { + "External id": 246895,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917925112.955, "dur": 151.835, + "args": { + "External id": 246896,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917925165.271, "dur": 11.376, + "args": { + "External id": 246897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917925170.061, "dur": 5.921, + "args": { + "External id": 246898,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917925179.635, "dur": 5.919, + "args": { + "External id": 246899,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917925186.806, "dur": 1.333, + "args": { + "External id": 246900,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917925190.606, "dur": 20.659, + "args": { + "External id": 246901,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917925276.982, "dur": 54.259, + "args": { + "External id": 246902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917925361.653, "dur": 28.180, + "args": { + "External id": 246903,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917925398.921, "dur": 40.700, + "args": { + "External id": 246904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917925448.684, "dur": 32.760, + "args": { + "External id": 246905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917925503.217, "dur": 26.082, + "args": { + "External id": 246906,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917925535.050, "dur": 41.760, + "args": { + "External id": 246907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917925596.751, "dur": 18.657, + "args": { + "External id": 246908,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 4183438, "tid": 4183438, + "ts": 667917925811.709, "dur": 80.462, + "args": { + "External id": 246909,"Record function id": 0, "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917925965.155, "dur": 47.442, + "args": { + "External id": 246910,"Record function id": 0, "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 4183438, "tid": 4183438, + "ts": 667917926021.597, "dur": 8570.308, + "args": { + "External id": 246911,"Record function id": 0, "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 4183438, "tid": 4183438, + "ts": 667917926030.745, "dur": 945.203, + "args": { + "External id": 246912,"Record function id": 0, "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917926111.067, "dur": 9.919, + "args": { + "External id": 246913,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917926137.496, "dur": 41.633, + "args": { + "External id": 246914,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926143.443, "dur": 2.085, + "args": { + "External id": 246915,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926149.870, "dur": 0.512, + "args": { + "External id": 246916,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926152.134, "dur": 0.687, + "args": { + "External id": 246917,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926154.641, "dur": 2.550, + "args": { + "External id": 246918,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926158.791, "dur": 0.641, + "args": { + "External id": 246919,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926160.812, "dur": 0.589, + "args": { + "External id": 246920,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926165.842, "dur": 1.537, + "args": { + "External id": 246921,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926168.894, "dur": 0.203, + "args": { + "External id": 246922,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926170.699, "dur": 0.442, + "args": { + "External id": 246923,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917926190.510, "dur": 57.032, + "args": { + "External id": 246924,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917926285.040, "dur": 121.213, + "args": { + "External id": 246925,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917926296.498, "dur": 5.102, + "args": { + "External id": 246926,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917926307.012, "dur": 13.247, + "args": { + "External id": 246927,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917926311.636, "dur": 8.153, + "args": { + "External id": 246928,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926315.391, "dur": 2.793, + "args": { + "External id": 246929,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917926327.619, "dur": 32.061, + "args": { + "External id": 246930,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926330.525, "dur": 0.566, + "args": { + "External id": 246931,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926332.726, "dur": 0.526, + "args": { + "External id": 246932,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926334.936, "dur": 0.354, + "args": { + "External id": 246933,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926338.684, "dur": 1.996, + "args": { + "External id": 246934,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926342.395, "dur": 0.354, + "args": { + "External id": 246935,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926344.191, "dur": 2.745, + "args": { + "External id": 246936,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926348.466, "dur": 0.209, + "args": { + "External id": 246937,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926350.042, "dur": 0.404, + "args": { + "External id": 246938,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917926354.493, "dur": 0.253, + "args": { + "External id": 246939,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917926372.045, "dur": 25.979, + "args": { + "External id": 246940,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917926461.316, "dur": 414.345, + "args": { + "External id": 246941,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917926489.012, "dur": 380.695, + "args": { + "External id": 246942,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5773, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917926499.766, "dur": 363.204, + "args": { + "External id": 246943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917926898.551, "dur": 2.643, + "args": { + "External id": 246944,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5775, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 4183438, "tid": 4183438, + "ts": 667917926999.134, "dur": 7396.429, + "args": { + "External id": 246945,"Record function id": 0, "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927100.752, "dur": 6.872, + "args": { + "External id": 246946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927111.443, "dur": 0.955, + "args": { + "External id": 246947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927114.044, "dur": 2.322, + "args": { + "External id": 246948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927117.964, "dur": 1.112, + "args": { + "External id": 246949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927120.416, "dur": 0.769, + "args": { + "External id": 246950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927123.100, "dur": 1.001, + "args": { + "External id": 246951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927125.971, "dur": 1.088, + "args": { + "External id": 246952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927129.002, "dur": 2.500, + "args": { + "External id": 246953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927133.304, "dur": 0.859, + "args": { + "External id": 246954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917927135.909, "dur": 0.650, + "args": { + "External id": 246955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917927156.317, "dur": 7199.512, + "args": { + "External id": 246956,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917927172.394, "dur": 7176.195, + "args": { + "External id": 246957,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917927208.283, "dur": 17.374, + "args": { + "External id": 246958,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917927229.403, "dur": 7087.610, + "args": { + "External id": 246959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917927232.369, "dur": 7083.973, + "args": { + "External id": 246960,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917927239.170, "dur": 7.759, + "args": { + "External id": 246961,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917927248.905, "dur": 7064.157, + "args": { + "External id": 246962,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917934531.584, "dur": 34.298, + "args": { + "External id": 246963,"Sequence number": 2987557, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5794 + } + }, + { + "ph": "s", "id": 187, "pid": 4183438, "tid": 4183438, "ts": 667917934531.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917934553.621, "dur": 7.689, + "args": { + "External id": 246964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917934556.799, "dur": 4.222, + "args": { + "External id": 246965,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917934631.912, "dur": 119.723, + "args": { + "External id": 246966,"Record function id": 0, "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917934754.876, "dur": 1091.645, + "args": { + "External id": 246967,"Record function id": 0, "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917934799.666, "dur": 1032.513, + "args": { + "External id": 246968,"Sequence number": 2987558, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5799 + } + }, + { + "ph": "s", "id": 186, "pid": 4183438, "tid": 4183438, "ts": 667917934799.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917934869.496, "dur": 45.578, + "args": { + "External id": 246969,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917934929.114, "dur": 93.164, + "args": { + "External id": 246970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935032.092, "dur": 35.272, + "args": { + "External id": 246971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935075.769, "dur": 30.455, + "args": { + "External id": 246972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917935129.992, "dur": 24.747, + "args": { + "External id": 246973,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917935172.202, "dur": 13.820, + "args": { + "External id": 246974,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917935219.806, "dur": 136.051, + "args": { + "External id": 246975,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917935271.970, "dur": 13.067, + "args": { + "External id": 246976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917935277.602, "dur": 6.395, + "args": { + "External id": 246977,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917935288.120, "dur": 6.456, + "args": { + "External id": 246978,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917935295.990, "dur": 1.300, + "args": { + "External id": 246979,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917935299.626, "dur": 4.113, + "args": { + "External id": 246980,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935367.611, "dur": 49.930, + "args": { + "External id": 246981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917935447.660, "dur": 28.926, + "args": { + "External id": 246982,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935485.777, "dur": 39.388, + "args": { + "External id": 246983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935534.542, "dur": 32.786, + "args": { + "External id": 246984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917935597.253, "dur": 26.775, + "args": { + "External id": 246985,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917935632.077, "dur": 88.077, + "args": { + "External id": 246986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917935743.071, "dur": 19.992, + "args": { + "External id": 246987,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 4183438, "tid": 4183438, + "ts": 667917935912.533, "dur": 82.927, + "args": { + "External id": 246988,"Record function id": 0, "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917936068.159, "dur": 48.261, + "args": { + "External id": 246989,"Record function id": 0, "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 4183438, "tid": 4183438, + "ts": 667917936125.732, "dur": 8570.731, + "args": { + "External id": 246990,"Record function id": 0, "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 4183438, "tid": 4183438, + "ts": 667917936134.401, "dur": 938.159, + "args": { + "External id": 246991,"Record function id": 0, "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917936232.408, "dur": 10.170, + "args": { + "External id": 246992,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917936256.573, "dur": 41.353, + "args": { + "External id": 246993,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936262.684, "dur": 2.381, + "args": { + "External id": 246994,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936270.367, "dur": 0.249, + "args": { + "External id": 246995,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936272.256, "dur": 0.693, + "args": { + "External id": 246996,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936274.443, "dur": 0.612, + "args": { + "External id": 246997,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936278.865, "dur": 0.357, + "args": { + "External id": 246998,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936280.834, "dur": 0.485, + "args": { + "External id": 246999,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936283.022, "dur": 2.792, + "args": { + "External id": 247000,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936287.422, "dur": 0.558, + "args": { + "External id": 247001,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936289.444, "dur": 0.897, + "args": { + "External id": 247002,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917936309.430, "dur": 42.812, + "args": { + "External id": 247003,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917936388.531, "dur": 120.274, + "args": { + "External id": 247004,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917936399.392, "dur": 4.822, + "args": { + "External id": 247005,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917936409.690, "dur": 10.835, + "args": { + "External id": 247006,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917936414.437, "dur": 5.682, + "args": { + "External id": 247007,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936418.136, "dur": 0.870, + "args": { + "External id": 247008,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917936427.435, "dur": 34.548, + "args": { + "External id": 247009,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936430.433, "dur": 3.139, + "args": { + "External id": 247010,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936435.758, "dur": 0.569, + "args": { + "External id": 247011,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936437.717, "dur": 0.752, + "args": { + "External id": 247012,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936442.290, "dur": 0.348, + "args": { + "External id": 247013,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936444.371, "dur": 0.356, + "args": { + "External id": 247014,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936446.377, "dur": 0.654, + "args": { + "External id": 247015,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936450.427, "dur": 0.680, + "args": { + "External id": 247016,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936452.578, "dur": 0.453, + "args": { + "External id": 247017,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917936454.425, "dur": 2.609, + "args": { + "External id": 247018,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917936476.144, "dur": 24.057, + "args": { + "External id": 247019,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917936564.319, "dur": 412.590, + "args": { + "External id": 247020,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917936591.435, "dur": 380.028, + "args": { + "External id": 247021,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5852, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917936601.895, "dur": 363.377, + "args": { + "External id": 247022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917936997.799, "dur": 2.848, + "args": { + "External id": 247023,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5854, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 4183438, "tid": 4183438, + "ts": 667917937093.700, "dur": 7380.292, + "args": { + "External id": 247024,"Record function id": 0, "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937209.879, "dur": 7.703, + "args": { + "External id": 247025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937222.905, "dur": 1.469, + "args": { + "External id": 247026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937225.954, "dur": 1.178, + "args": { + "External id": 247027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937228.630, "dur": 1.146, + "args": { + "External id": 247028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937231.172, "dur": 0.998, + "args": { + "External id": 247029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937233.659, "dur": 1.362, + "args": { + "External id": 247030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937236.921, "dur": 1.211, + "args": { + "External id": 247031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937239.562, "dur": 2.445, + "args": { + "External id": 247032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937243.587, "dur": 1.100, + "args": { + "External id": 247033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917937246.171, "dur": 1.027, + "args": { + "External id": 247034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917937266.646, "dur": 7168.455, + "args": { + "External id": 247035,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917937283.442, "dur": 7144.945, + "args": { + "External id": 247036,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917937305.518, "dur": 13.622, + "args": { + "External id": 247037,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917937321.958, "dur": 7075.364, + "args": { + "External id": 247038,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917937324.464, "dur": 7072.303, + "args": { + "External id": 247039,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917937330.711, "dur": 8.482, + "args": { + "External id": 247040,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917937341.177, "dur": 7052.537, + "args": { + "External id": 247041,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917944611.959, "dur": 32.948, + "args": { + "External id": 247042,"Sequence number": 2987559, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5873 + } + }, + { + "ph": "s", "id": 185, "pid": 4183438, "tid": 4183438, "ts": 667917944611.959, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917944632.218, "dur": 7.829, + "args": { + "External id": 247043,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917944635.358, "dur": 4.412, + "args": { + "External id": 247044,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917944736.575, "dur": 92.446, + "args": { + "External id": 247045,"Record function id": 0, "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917944831.005, "dur": 1087.156, + "args": { + "External id": 247046,"Record function id": 0, "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917944873.498, "dur": 1030.532, + "args": { + "External id": 247047,"Sequence number": 2987560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5878 + } + }, + { + "ph": "s", "id": 184, "pid": 4183438, "tid": 4183438, "ts": 667917944873.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917944940.752, "dur": 46.247, + "args": { + "External id": 247048,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945000.266, "dur": 93.147, + "args": { + "External id": 247049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945101.970, "dur": 34.817, + "args": { + "External id": 247050,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945145.367, "dur": 29.105, + "args": { + "External id": 247051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917945214.270, "dur": 27.937, + "args": { + "External id": 247052,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917945261.978, "dur": 14.584, + "args": { + "External id": 247053,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917945294.310, "dur": 134.194, + "args": { + "External id": 247054,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917945346.806, "dur": 13.233, + "args": { + "External id": 247055,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917945352.378, "dur": 6.793, + "args": { + "External id": 247056,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917945362.948, "dur": 6.343, + "args": { + "External id": 247057,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917945370.912, "dur": 1.865, + "args": { + "External id": 247058,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917945375.245, "dur": 2.824, + "args": { + "External id": 247059,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945439.885, "dur": 48.420, + "args": { + "External id": 247060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917945518.872, "dur": 27.959, + "args": { + "External id": 247061,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945554.426, "dur": 38.970, + "args": { + "External id": 247062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945602.984, "dur": 32.189, + "args": { + "External id": 247063,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917945695.639, "dur": 32.936, + "args": { + "External id": 247064,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917945736.149, "dur": 50.441, + "args": { + "External id": 247065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917945809.512, "dur": 19.358, + "args": { + "External id": 247066,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 4183438, "tid": 4183438, + "ts": 667917945985.291, "dur": 81.195, + "args": { + "External id": 247067,"Record function id": 0, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917946140.760, "dur": 49.498, + "args": { + "External id": 247068,"Record function id": 0, "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 4183438, "tid": 4183438, + "ts": 667917946216.942, "dur": 8473.464, + "args": { + "External id": 247069,"Record function id": 0, "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 4183438, "tid": 4183438, + "ts": 667917946227.051, "dur": 913.756, + "args": { + "External id": 247070,"Record function id": 0, "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917946312.016, "dur": 10.301, + "args": { + "External id": 247071,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917946335.534, "dur": 45.883, + "args": { + "External id": 247072,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946345.183, "dur": 2.544, + "args": { + "External id": 247073,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946352.218, "dur": 0.554, + "args": { + "External id": 247074,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946353.867, "dur": 0.372, + "args": { + "External id": 247075,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946355.267, "dur": 3.252, + "args": { + "External id": 247076,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946359.704, "dur": 0.475, + "args": { + "External id": 247077,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946362.807, "dur": 0.441, + "args": { + "External id": 247078,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946367.463, "dur": 1.015, + "args": { + "External id": 247079,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946369.654, "dur": 0.816, + "args": { + "External id": 247080,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946371.717, "dur": 2.474, + "args": { + "External id": 247081,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917946393.412, "dur": 42.382, + "args": { + "External id": 247082,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917946470.337, "dur": 110.576, + "args": { + "External id": 247083,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917946481.224, "dur": 4.639, + "args": { + "External id": 247084,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917946491.705, "dur": 10.372, + "args": { + "External id": 247085,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917946496.110, "dur": 5.551, + "args": { + "External id": 247086,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946499.648, "dur": 0.775, + "args": { + "External id": 247087,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917946508.452, "dur": 31.481, + "args": { + "External id": 247088,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946510.046, "dur": 0.932, + "args": { + "External id": 247089,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946512.107, "dur": 0.727, + "args": { + "External id": 247090,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946514.529, "dur": 2.627, + "args": { + "External id": 247091,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946518.172, "dur": 0.619, + "args": { + "External id": 247092,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946519.993, "dur": 1.026, + "args": { + "External id": 247093,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946524.577, "dur": 0.392, + "args": { + "External id": 247094,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946526.354, "dur": 0.792, + "args": { + "External id": 247095,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946528.224, "dur": 0.887, + "args": { + "External id": 247096,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917946533.868, "dur": 0.567, + "args": { + "External id": 247097,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917946551.137, "dur": 20.803, + "args": { + "External id": 247098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917946634.761, "dur": 408.551, + "args": { + "External id": 247099,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917946699.112, "dur": 339.149, + "args": { + "External id": 247100,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5931, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917946711.542, "dur": 321.663, + "args": { + "External id": 247101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917947065.419, "dur": 2.414, + "args": { + "External id": 247102,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5933, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 4183438, "tid": 4183438, + "ts": 667917947161.639, "dur": 7301.357, + "args": { + "External id": 247103,"Record function id": 0, "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947287.055, "dur": 7.481, + "args": { + "External id": 247104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947298.364, "dur": 1.240, + "args": { + "External id": 247105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947301.129, "dur": 1.022, + "args": { + "External id": 247106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947303.727, "dur": 0.896, + "args": { + "External id": 247107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947306.313, "dur": 1.036, + "args": { + "External id": 247108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947308.887, "dur": 1.346, + "args": { + "External id": 247109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947311.989, "dur": 1.431, + "args": { + "External id": 247110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947315.247, "dur": 2.335, + "args": { + "External id": 247111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947319.368, "dur": 1.019, + "args": { + "External id": 247112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917947322.041, "dur": 0.950, + "args": { + "External id": 247113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917947342.553, "dur": 7080.826, + "args": { + "External id": 247114,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917947358.978, "dur": 7056.659, + "args": { + "External id": 247115,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917947374.437, "dur": 15.410, + "args": { + "External id": 247116,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917947392.459, "dur": 6989.363, + "args": { + "External id": 247117,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917947395.033, "dur": 6986.300, + "args": { + "External id": 247118,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917947401.492, "dur": 7.344, + "args": { + "External id": 247119,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917947410.870, "dur": 6966.839, + "args": { + "External id": 247120,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917954607.036, "dur": 29.684, + "args": { + "External id": 247121,"Sequence number": 2987561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 5952 + } + }, + { + "ph": "s", "id": 183, "pid": 4183438, "tid": 4183438, "ts": 667917954607.036, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917954623.710, "dur": 8.106, + "args": { + "External id": 247122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917954626.854, "dur": 4.682, + "args": { + "External id": 247123,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917954732.471, "dur": 96.059, + "args": { + "External id": 247124,"Record function id": 0, "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917954830.866, "dur": 1112.113, + "args": { + "External id": 247125,"Record function id": 0, "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917954873.337, "dur": 1054.596, + "args": { + "External id": 247126,"Sequence number": 2987562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 5957 + } + }, + { + "ph": "s", "id": 182, "pid": 4183438, "tid": 4183438, "ts": 667917954873.337, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917954943.669, "dur": 46.989, + "args": { + "External id": 247127,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955004.473, "dur": 95.499, + "args": { + "External id": 247128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955109.066, "dur": 35.719, + "args": { + "External id": 247129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955154.101, "dur": 30.322, + "args": { + "External id": 247130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917955226.043, "dur": 29.587, + "args": { + "External id": 247131,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917955275.639, "dur": 16.079, + "args": { + "External id": 247132,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917955310.152, "dur": 141.103, + "args": { + "External id": 247133,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917955367.936, "dur": 12.088, + "args": { + "External id": 247134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917955372.790, "dur": 6.456, + "args": { + "External id": 247135,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917955382.734, "dur": 6.448, + "args": { + "External id": 247136,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917955390.256, "dur": 1.630, + "args": { + "External id": 247137,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917955394.326, "dur": 3.493, + "args": { + "External id": 247138,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955462.905, "dur": 49.731, + "args": { + "External id": 247139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917955543.922, "dur": 29.120, + "args": { + "External id": 247140,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955581.080, "dur": 39.187, + "args": { + "External id": 247141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955629.351, "dur": 67.399, + "args": { + "External id": 247142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917955724.590, "dur": 27.068, + "args": { + "External id": 247143,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917955758.759, "dur": 57.583, + "args": { + "External id": 247144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917955837.024, "dur": 19.334, + "args": { + "External id": 247145,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 4183438, "tid": 4183438, + "ts": 667917956008.637, "dur": 78.608, + "args": { + "External id": 247146,"Record function id": 0, "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917956158.978, "dur": 63.055, + "args": { + "External id": 247147,"Record function id": 0, "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 4183438, "tid": 4183438, + "ts": 667917956232.743, "dur": 8632.558, + "args": { + "External id": 247148,"Record function id": 0, "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 4183438, "tid": 4183438, + "ts": 667917956241.502, "dur": 909.434, + "args": { + "External id": 247149,"Record function id": 0, "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917956324.123, "dur": 10.207, + "args": { + "External id": 247150,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917956348.342, "dur": 38.163, + "args": { + "External id": 247151,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956353.816, "dur": 2.451, + "args": { + "External id": 247152,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956361.013, "dur": 0.600, + "args": { + "External id": 247153,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956362.774, "dur": 0.895, + "args": { + "External id": 247154,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956365.161, "dur": 0.589, + "args": { + "External id": 247155,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956369.099, "dur": 0.565, + "args": { + "External id": 247156,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956371.219, "dur": 0.483, + "args": { + "External id": 247157,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956372.449, "dur": 2.764, + "args": { + "External id": 247158,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956376.714, "dur": 0.593, + "args": { + "External id": 247159,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956378.133, "dur": 0.724, + "args": { + "External id": 247160,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917956398.504, "dur": 42.534, + "args": { + "External id": 247161,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917956474.447, "dur": 119.958, + "args": { + "External id": 247162,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917956484.787, "dur": 4.422, + "args": { + "External id": 247163,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917956494.537, "dur": 10.672, + "args": { + "External id": 247164,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917956499.224, "dur": 5.579, + "args": { + "External id": 247165,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956502.566, "dur": 1.043, + "args": { + "External id": 247166,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917956512.201, "dur": 30.246, + "args": { + "External id": 247167,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956513.851, "dur": 2.698, + "args": { + "External id": 247168,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956517.989, "dur": 0.646, + "args": { + "External id": 247169,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956519.556, "dur": 0.697, + "args": { + "External id": 247170,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956524.716, "dur": 0.396, + "args": { + "External id": 247171,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956526.217, "dur": 0.762, + "args": { + "External id": 247172,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956528.369, "dur": 0.482, + "args": { + "External id": 247173,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956531.459, "dur": 0.497, + "args": { + "External id": 247174,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956533.364, "dur": 0.724, + "args": { + "External id": 247175,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917956535.063, "dur": 2.522, + "args": { + "External id": 247176,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917956560.447, "dur": 23.764, + "args": { + "External id": 247177,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917956647.052, "dur": 406.956, + "args": { + "External id": 247178,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917956711.104, "dur": 337.643, + "args": { + "External id": 247179,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6010, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917956724.257, "dur": 319.092, + "args": { + "External id": 247180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917957073.345, "dur": 2.418, + "args": { + "External id": 247181,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6012, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 4183438, "tid": 4183438, + "ts": 667917957172.441, "dur": 7464.393, + "args": { + "External id": 247182,"Record function id": 0, "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957291.804, "dur": 7.434, + "args": { + "External id": 247183,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957303.155, "dur": 1.383, + "args": { + "External id": 247184,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957306.396, "dur": 1.622, + "args": { + "External id": 247185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957309.703, "dur": 1.114, + "args": { + "External id": 247186,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957312.625, "dur": 1.108, + "args": { + "External id": 247187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957315.177, "dur": 1.390, + "args": { + "External id": 247188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957318.700, "dur": 1.272, + "args": { + "External id": 247189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957321.650, "dur": 2.424, + "args": { + "External id": 247190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957326.137, "dur": 0.878, + "args": { + "External id": 247191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917957328.873, "dur": 1.049, + "args": { + "External id": 247192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917957348.537, "dur": 7248.875, + "args": { + "External id": 247193,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917957365.075, "dur": 7225.332, + "args": { + "External id": 247194,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917957380.173, "dur": 14.071, + "args": { + "External id": 247195,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917957397.359, "dur": 7160.649, + "args": { + "External id": 247196,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917957399.920, "dur": 7157.622, + "args": { + "External id": 247197,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917957406.522, "dur": 7.478, + "args": { + "External id": 247198,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917957415.929, "dur": 7138.626, + "args": { + "External id": 247199,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917964806.721, "dur": 31.484, + "args": { + "External id": 247200,"Sequence number": 2987563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 6031 + } + }, + { + "ph": "s", "id": 181, "pid": 4183438, "tid": 4183438, "ts": 667917964806.721, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917964825.304, "dur": 8.065, + "args": { + "External id": 247201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917964828.372, "dur": 4.632, + "args": { + "External id": 247202,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917964905.501, "dur": 93.171, + "args": { + "External id": 247203,"Record function id": 0, "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917965000.220, "dur": 1110.329, + "args": { + "External id": 247204,"Record function id": 0, "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917965042.879, "dur": 1053.229, + "args": { + "External id": 247205,"Sequence number": 2987564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 6036 + } + }, + { + "ph": "s", "id": 180, "pid": 4183438, "tid": 4183438, "ts": 667917965042.879, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917965108.780, "dur": 43.583, + "args": { + "External id": 247206,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965166.227, "dur": 110.312, + "args": { + "External id": 247207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965288.394, "dur": 37.505, + "args": { + "External id": 247208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965335.777, "dur": 29.838, + "args": { + "External id": 247209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917965391.156, "dur": 25.864, + "args": { + "External id": 247210,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917965434.950, "dur": 17.324, + "args": { + "External id": 247211,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917965469.494, "dur": 134.359, + "args": { + "External id": 247212,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917965522.006, "dur": 11.581, + "args": { + "External id": 247213,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917965526.747, "dur": 5.969, + "args": { + "External id": 247214,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917965536.293, "dur": 6.400, + "args": { + "External id": 247215,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917965544.015, "dur": 2.100, + "args": { + "External id": 247216,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917965548.461, "dur": 2.909, + "args": { + "External id": 247217,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965615.577, "dur": 79.484, + "args": { + "External id": 247218,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917965731.302, "dur": 34.653, + "args": { + "External id": 247219,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965774.609, "dur": 43.356, + "args": { + "External id": 247220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965827.300, "dur": 45.262, + "args": { + "External id": 247221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917965895.651, "dur": 27.676, + "args": { + "External id": 247222,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917965929.042, "dur": 51.317, + "args": { + "External id": 247223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917966000.338, "dur": 19.256, + "args": { + "External id": 247224,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 4183438, "tid": 4183438, + "ts": 667917966177.695, "dur": 97.884, + "args": { + "External id": 247225,"Record function id": 0, "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917966351.665, "dur": 49.396, + "args": { + "External id": 247226,"Record function id": 0, "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 4183438, "tid": 4183438, + "ts": 667917966409.783, "dur": 8462.445, + "args": { + "External id": 247227,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 4183438, "tid": 4183438, + "ts": 667917966417.544, "dur": 1008.229, + "args": { + "External id": 247228,"Record function id": 0, "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917966502.630, "dur": 9.649, + "args": { + "External id": 247229,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917966526.056, "dur": 38.358, + "args": { + "External id": 247230,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966531.807, "dur": 2.100, + "args": { + "External id": 247231,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966538.880, "dur": 0.797, + "args": { + "External id": 247232,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966540.981, "dur": 0.630, + "args": { + "External id": 247233,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966543.409, "dur": 0.475, + "args": { + "External id": 247234,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966546.530, "dur": 0.643, + "args": { + "External id": 247235,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966548.597, "dur": 0.611, + "args": { + "External id": 247236,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966550.511, "dur": 2.926, + "args": { + "External id": 247237,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966554.996, "dur": 0.860, + "args": { + "External id": 247238,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966557.206, "dur": 0.630, + "args": { + "External id": 247239,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917966578.462, "dur": 45.414, + "args": { + "External id": 247240,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917966697.307, "dur": 123.286, + "args": { + "External id": 247241,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917966709.866, "dur": 7.548, + "args": { + "External id": 247242,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917966723.374, "dur": 11.704, + "args": { + "External id": 247243,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917966728.003, "dur": 6.650, + "args": { + "External id": 247244,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966731.992, "dur": 1.062, + "args": { + "External id": 247245,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917966742.332, "dur": 29.756, + "args": { + "External id": 247246,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966744.431, "dur": 0.652, + "args": { + "External id": 247247,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966746.687, "dur": 2.758, + "args": { + "External id": 247248,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966750.724, "dur": 0.585, + "args": { + "External id": 247249,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966752.729, "dur": 1.063, + "args": { + "External id": 247250,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966757.050, "dur": 0.580, + "args": { + "External id": 247251,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966758.686, "dur": 0.567, + "args": { + "External id": 247252,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966760.525, "dur": 0.411, + "args": { + "External id": 247253,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966764.598, "dur": 0.761, + "args": { + "External id": 247254,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917966766.793, "dur": 0.370, + "args": { + "External id": 247255,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917966784.123, "dur": 28.144, + "args": { + "External id": 247256,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917966876.083, "dur": 448.879, + "args": { + "External id": 247257,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917966904.132, "dur": 415.250, + "args": { + "External id": 247258,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6089, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917966914.826, "dur": 398.473, + "args": { + "External id": 247259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917967349.130, "dur": 2.409, + "args": { + "External id": 247260,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6091, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 4183438, "tid": 4183438, + "ts": 667917967447.984, "dur": 7201.812, + "args": { + "External id": 247261,"Record function id": 0, "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967553.022, "dur": 7.287, + "args": { + "External id": 247262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967563.573, "dur": 1.243, + "args": { + "External id": 247263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967566.454, "dur": 1.028, + "args": { + "External id": 247264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967569.211, "dur": 1.109, + "args": { + "External id": 247265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967571.908, "dur": 1.122, + "args": { + "External id": 247266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967575.094, "dur": 1.109, + "args": { + "External id": 247267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967578.080, "dur": 1.405, + "args": { + "External id": 247268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967581.255, "dur": 2.820, + "args": { + "External id": 247269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967586.248, "dur": 0.862, + "args": { + "External id": 247270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917967588.721, "dur": 1.220, + "args": { + "External id": 247271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917967608.033, "dur": 7003.676, + "args": { + "External id": 247272,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917967623.944, "dur": 6980.866, + "args": { + "External id": 247273,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917967639.138, "dur": 51.132, + "args": { + "External id": 247274,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917967694.402, "dur": 6876.974, + "args": { + "External id": 247275,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917967696.904, "dur": 6873.975, + "args": { + "External id": 247276,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917967703.574, "dur": 8.639, + "args": { + "External id": 247277,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917967713.934, "dur": 6854.134, + "args": { + "External id": 247278,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917974816.194, "dur": 30.243, + "args": { + "External id": 247279,"Sequence number": 2987565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 6110 + } + }, + { + "ph": "s", "id": 179, "pid": 4183438, "tid": 4183438, "ts": 667917974816.194, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917974833.019, "dur": 8.318, + "args": { + "External id": 247280,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917974836.168, "dur": 4.825, + "args": { + "External id": 247281,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917974911.106, "dur": 91.675, + "args": { + "External id": 247282,"Record function id": 0, "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917975004.410, "dur": 1145.463, + "args": { + "External id": 247283,"Record function id": 0, "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917975045.635, "dur": 1089.548, + "args": { + "External id": 247284,"Sequence number": 2987566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 6115 + } + }, + { + "ph": "s", "id": 178, "pid": 4183438, "tid": 4183438, "ts": 667917975045.635, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917975113.850, "dur": 44.112, + "args": { + "External id": 247285,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975171.692, "dur": 112.366, + "args": { + "External id": 247286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975296.840, "dur": 36.968, + "args": { + "External id": 247287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975343.279, "dur": 28.541, + "args": { + "External id": 247288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917975402.056, "dur": 30.328, + "args": { + "External id": 247289,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917975448.124, "dur": 15.355, + "args": { + "External id": 247290,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917975481.301, "dur": 133.304, + "args": { + "External id": 247291,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917975533.522, "dur": 11.526, + "args": { + "External id": 247292,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917975538.318, "dur": 5.908, + "args": { + "External id": 247293,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917975547.868, "dur": 6.223, + "args": { + "External id": 247294,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917975555.437, "dur": 1.698, + "args": { + "External id": 247295,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917975559.438, "dur": 4.704, + "args": { + "External id": 247296,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975688.263, "dur": 60.719, + "args": { + "External id": 247297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917975787.496, "dur": 31.071, + "args": { + "External id": 247298,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975827.624, "dur": 47.571, + "args": { + "External id": 247299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975884.455, "dur": 47.824, + "args": { + "External id": 247300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917975955.145, "dur": 25.368, + "args": { + "External id": 247301,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917975986.493, "dur": 39.145, + "args": { + "External id": 247302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917976044.932, "dur": 19.669, + "args": { + "External id": 247303,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 4183438, "tid": 4183438, + "ts": 667917976238.332, "dur": 83.090, + "args": { + "External id": 247304,"Record function id": 0, "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917976396.662, "dur": 48.430, + "args": { + "External id": 247305,"Record function id": 0, "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 4183438, "tid": 4183438, + "ts": 667917976454.660, "dur": 8770.600, + "args": { + "External id": 247306,"Record function id": 0, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 4183438, "tid": 4183438, + "ts": 667917976464.264, "dur": 947.793, + "args": { + "External id": 247307,"Record function id": 0, "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917976549.167, "dur": 9.544, + "args": { + "External id": 247308,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917976572.451, "dur": 39.784, + "args": { + "External id": 247309,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976578.347, "dur": 2.518, + "args": { + "External id": 247310,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976585.727, "dur": 0.567, + "args": { + "External id": 247311,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976587.391, "dur": 0.705, + "args": { + "External id": 247312,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976590.143, "dur": 0.959, + "args": { + "External id": 247313,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976594.668, "dur": 0.504, + "args": { + "External id": 247314,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976596.806, "dur": 0.716, + "args": { + "External id": 247315,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976598.765, "dur": 2.404, + "args": { + "External id": 247316,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976602.996, "dur": 0.470, + "args": { + "External id": 247317,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976604.380, "dur": 0.659, + "args": { + "External id": 247318,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917976623.621, "dur": 81.345, + "args": { + "External id": 247319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917976743.211, "dur": 121.089, + "args": { + "External id": 247320,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917976755.078, "dur": 6.378, + "args": { + "External id": 247321,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917976767.007, "dur": 11.203, + "args": { + "External id": 247322,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917976771.867, "dur": 5.903, + "args": { + "External id": 247323,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976775.230, "dur": 0.978, + "args": { + "External id": 247324,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917976785.492, "dur": 32.319, + "args": { + "External id": 247325,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976788.044, "dur": 3.016, + "args": { + "External id": 247326,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976792.424, "dur": 0.785, + "args": { + "External id": 247327,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976794.508, "dur": 0.845, + "args": { + "External id": 247328,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976799.640, "dur": 0.319, + "args": { + "External id": 247329,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976801.182, "dur": 0.675, + "args": { + "External id": 247330,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976803.596, "dur": 0.515, + "args": { + "External id": 247331,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976807.020, "dur": 0.780, + "args": { + "External id": 247332,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976809.050, "dur": 0.910, + "args": { + "External id": 247333,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917976810.968, "dur": 2.687, + "args": { + "External id": 247334,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917976831.248, "dur": 24.558, + "args": { + "External id": 247335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917976919.181, "dur": 397.021, + "args": { + "External id": 247336,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917976947.030, "dur": 363.697, + "args": { + "External id": 247337,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6168, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917976957.672, "dur": 347.573, + "args": { + "External id": 247338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917977337.575, "dur": 2.589, + "args": { + "External id": 247339,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6170, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 4183438, "tid": 4183438, + "ts": 667917977433.445, "dur": 7584.095, + "args": { + "External id": 247340,"Record function id": 0, "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977531.849, "dur": 6.807, + "args": { + "External id": 247341,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977541.822, "dur": 1.367, + "args": { + "External id": 247342,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977544.926, "dur": 1.152, + "args": { + "External id": 247343,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977547.615, "dur": 1.501, + "args": { + "External id": 247344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977551.016, "dur": 1.449, + "args": { + "External id": 247345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977554.084, "dur": 1.141, + "args": { + "External id": 247346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977557.193, "dur": 0.949, + "args": { + "External id": 247347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977560.211, "dur": 2.458, + "args": { + "External id": 247348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977564.758, "dur": 1.080, + "args": { + "External id": 247349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917977567.627, "dur": 1.154, + "args": { + "External id": 247350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917977587.130, "dur": 7390.822, + "args": { + "External id": 247351,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917977603.444, "dur": 7367.978, + "args": { + "External id": 247352,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917977619.694, "dur": 15.190, + "args": { + "External id": 247353,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917977637.979, "dur": 7300.811, + "args": { + "External id": 247354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917977640.648, "dur": 7297.613, + "args": { + "External id": 247355,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917977646.968, "dur": 45.141, + "args": { + "External id": 247356,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917977694.993, "dur": 7239.747, + "args": { + "External id": 247357,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917985156.363, "dur": 29.740, + "args": { + "External id": 247358,"Sequence number": 2987567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 6189 + } + }, + { + "ph": "s", "id": 177, "pid": 4183438, "tid": 4183438, "ts": 667917985156.363, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917985173.043, "dur": 8.225, + "args": { + "External id": 247359,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917985176.419, "dur": 4.609, + "args": { + "External id": 247360,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917985268.235, "dur": 91.048, + "args": { + "External id": 247361,"Record function id": 0, "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917985360.896, "dur": 1095.232, + "args": { + "External id": 247362,"Record function id": 0, "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917985402.635, "dur": 1039.546, + "args": { + "External id": 247363,"Sequence number": 2987568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 6194 + } + }, + { + "ph": "s", "id": 176, "pid": 4183438, "tid": 4183438, "ts": 667917985402.635, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917985471.313, "dur": 44.608, + "args": { + "External id": 247364,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917985529.610, "dur": 95.956, + "args": { + "External id": 247365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917985634.188, "dur": 76.066, + "args": { + "External id": 247366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917985723.366, "dur": 32.950, + "args": { + "External id": 247367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917985782.109, "dur": 26.490, + "args": { + "External id": 247368,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917985826.415, "dur": 15.426, + "args": { + "External id": 247369,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917985859.739, "dur": 132.640, + "args": { + "External id": 247370,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917985912.294, "dur": 12.527, + "args": { + "External id": 247371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917985917.621, "dur": 6.448, + "args": { + "External id": 247372,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917985927.663, "dur": 6.564, + "args": { + "External id": 247373,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917985935.639, "dur": 1.668, + "args": { + "External id": 247374,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917985939.625, "dur": 2.714, + "args": { + "External id": 247375,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917986004.196, "dur": 46.880, + "args": { + "External id": 247376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917986080.607, "dur": 25.605, + "args": { + "External id": 247377,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917986114.212, "dur": 38.765, + "args": { + "External id": 247378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917986161.997, "dur": 47.901, + "args": { + "External id": 247379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917986234.635, "dur": 29.980, + "args": { + "External id": 247380,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917986271.079, "dur": 55.996, + "args": { + "External id": 247381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917986347.693, "dur": 19.304, + "args": { + "External id": 247382,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 4183438, "tid": 4183438, + "ts": 667917986524.253, "dur": 78.759, + "args": { + "External id": 247383,"Record function id": 0, "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917986716.260, "dur": 51.466, + "args": { + "External id": 247384,"Record function id": 0, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 4183438, "tid": 4183438, + "ts": 667917986777.991, "dur": 8625.849, + "args": { + "External id": 247385,"Record function id": 0, "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 4183438, "tid": 4183438, + "ts": 667917986787.483, "dur": 927.640, + "args": { + "External id": 247386,"Record function id": 0, "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917986871.875, "dur": 10.617, + "args": { + "External id": 247387,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917986896.826, "dur": 37.388, + "args": { + "External id": 247388,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986902.197, "dur": 2.677, + "args": { + "External id": 247389,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986909.481, "dur": 0.524, + "args": { + "External id": 247390,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986911.177, "dur": 0.604, + "args": { + "External id": 247391,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986913.115, "dur": 0.726, + "args": { + "External id": 247392,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986916.663, "dur": 0.613, + "args": { + "External id": 247393,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986918.609, "dur": 0.776, + "args": { + "External id": 247394,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986920.523, "dur": 3.379, + "args": { + "External id": 247395,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986925.077, "dur": 0.731, + "args": { + "External id": 247396,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917986927.045, "dur": 0.690, + "args": { + "External id": 247397,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917986946.183, "dur": 40.952, + "args": { + "External id": 247398,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917987021.958, "dur": 110.768, + "args": { + "External id": 247399,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917987035.758, "dur": 5.875, + "args": { + "External id": 247400,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917987047.053, "dur": 10.698, + "args": { + "External id": 247401,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917987051.539, "dur": 5.789, + "args": { + "External id": 247402,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987054.881, "dur": 1.107, + "args": { + "External id": 247403,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917987064.402, "dur": 27.669, + "args": { + "External id": 247404,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987066.648, "dur": 0.767, + "args": { + "External id": 247405,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987068.640, "dur": 2.980, + "args": { + "External id": 247406,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987072.720, "dur": 0.634, + "args": { + "External id": 247407,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987074.366, "dur": 0.656, + "args": { + "External id": 247408,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987078.492, "dur": 0.421, + "args": { + "External id": 247409,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987079.689, "dur": 0.772, + "args": { + "External id": 247410,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987081.425, "dur": 0.506, + "args": { + "External id": 247411,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987084.978, "dur": 0.701, + "args": { + "External id": 247412,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987086.872, "dur": 0.412, + "args": { + "External id": 247413,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917987102.566, "dur": 21.897, + "args": { + "External id": 247414,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917987184.154, "dur": 398.278, + "args": { + "External id": 247415,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917987226.745, "dur": 350.177, + "args": { + "External id": 247416,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6247, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917987238.672, "dur": 330.894, + "args": { + "External id": 247417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917987603.184, "dur": 2.213, + "args": { + "External id": 247418,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6249, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 4183438, "tid": 4183438, + "ts": 667917987740.480, "dur": 7450.653, + "args": { + "External id": 247419,"Record function id": 0, "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987851.557, "dur": 7.317, + "args": { + "External id": 247420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987862.349, "dur": 1.466, + "args": { + "External id": 247421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987865.426, "dur": 1.060, + "args": { + "External id": 247422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987868.163, "dur": 1.208, + "args": { + "External id": 247423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987870.884, "dur": 1.157, + "args": { + "External id": 247424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987873.785, "dur": 1.334, + "args": { + "External id": 247425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987876.928, "dur": 1.431, + "args": { + "External id": 247426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987880.236, "dur": 2.493, + "args": { + "External id": 247427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987884.262, "dur": 1.157, + "args": { + "External id": 247428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917987887.664, "dur": 1.015, + "args": { + "External id": 247429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917987907.721, "dur": 7244.988, + "args": { + "External id": 247430,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917987923.410, "dur": 7222.693, + "args": { + "External id": 247431,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917987939.505, "dur": 14.568, + "args": { + "External id": 247432,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917987956.906, "dur": 7152.452, + "args": { + "External id": 247433,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917987959.411, "dur": 7149.462, + "args": { + "External id": 247434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917987965.668, "dur": 5.522, + "args": { + "External id": 247435,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917987973.204, "dur": 7132.323, + "args": { + "External id": 247436,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917995348.074, "dur": 29.394, + "args": { + "External id": 247437,"Sequence number": 2987569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 6268 + } + }, + { + "ph": "s", "id": 175, "pid": 4183438, "tid": 4183438, "ts": 667917995348.074, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667917995364.278, "dur": 8.020, + "args": { + "External id": 247438,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917995367.423, "dur": 4.558, + "args": { + "External id": 247439,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667917995444.195, "dur": 88.053, + "args": { + "External id": 247440,"Record function id": 0, "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667917995534.377, "dur": 1079.044, + "args": { + "External id": 247441,"Record function id": 0, "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667917995574.926, "dur": 1023.180, + "args": { + "External id": 247442,"Sequence number": 2987570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 6273 + } + }, + { + "ph": "s", "id": 174, "pid": 4183438, "tid": 4183438, "ts": 667917995574.926, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667917995642.710, "dur": 79.801, + "args": { + "External id": 247443,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917995742.684, "dur": 95.966, + "args": { + "External id": 247444,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917995848.478, "dur": 35.581, + "args": { + "External id": 247445,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917995890.949, "dur": 31.174, + "args": { + "External id": 247446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917995949.916, "dur": 27.491, + "args": { + "External id": 247447,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667917995992.668, "dur": 17.226, + "args": { + "External id": 247448,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667917996025.183, "dur": 132.232, + "args": { + "External id": 247449,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667917996078.081, "dur": 11.632, + "args": { + "External id": 247450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917996082.959, "dur": 5.991, + "args": { + "External id": 247451,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917996092.496, "dur": 4.733, + "args": { + "External id": 247452,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917996098.471, "dur": 3.679, + "args": { + "External id": 247453,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917996104.767, "dur": 2.910, + "args": { + "External id": 247454,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917996169.370, "dur": 61.865, + "args": { + "External id": 247455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667917996265.821, "dur": 30.410, + "args": { + "External id": 247456,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917996304.759, "dur": 41.863, + "args": { + "External id": 247457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917996354.151, "dur": 31.828, + "args": { + "External id": 247458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667917996409.885, "dur": 27.422, + "args": { + "External id": 247459,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667917996443.180, "dur": 45.380, + "args": { + "External id": 247460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667917996506.542, "dur": 21.786, + "args": { + "External id": 247461,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 4183438, "tid": 4183438, + "ts": 667917996729.926, "dur": 78.443, + "args": { + "External id": 247462,"Record function id": 0, "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 4183438, "tid": 4183438, + "ts": 667917996885.341, "dur": 49.044, + "args": { + "External id": 247463,"Record function id": 0, "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 4183438, "tid": 4183438, + "ts": 667917996943.899, "dur": 8554.584, + "args": { + "External id": 247464,"Record function id": 0, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 4183438, "tid": 4183438, + "ts": 667917996952.050, "dur": 934.972, + "args": { + "External id": 247465,"Record function id": 0, "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917997033.663, "dur": 9.981, + "args": { + "External id": 247466,"Record function id": 0, "Concrete Inputs": ["[1605888]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917997057.404, "dur": 37.437, + "args": { + "External id": 247467,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997063.301, "dur": 2.305, + "args": { + "External id": 247468,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997070.648, "dur": 0.298, + "args": { + "External id": 247469,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "128"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997072.138, "dur": 0.851, + "args": { + "External id": 247470,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "131200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997074.611, "dur": 0.786, + "args": { + "External id": 247471,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "262272"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997078.109, "dur": 0.668, + "args": { + "External id": 247472,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "393344"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997079.955, "dur": 0.626, + "args": { + "External id": 247473,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "524416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997081.355, "dur": 3.142, + "args": { + "External id": 247474,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997086.170, "dur": 0.524, + "args": { + "External id": 247475,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "884992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997087.617, "dur": 0.832, + "args": { + "External id": 247476,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "1245440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917997105.886, "dur": 45.427, + "args": { + "External id": 247477,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 4183438, "tid": 4183438, + "ts": 667917997185.810, "dur": 138.549, + "args": { + "External id": 247478,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1605888", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [], [], [], [], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917997214.256, "dur": 6.089, + "args": { + "External id": 247479,"Record function id": 0, "Concrete Inputs": ["[12847104]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 4183438, "tid": 4183438, + "ts": 667917997226.111, "dur": 11.611, + "args": { + "External id": 247480,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "1605888"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667917997230.797, "dur": 6.487, + "args": { + "External id": 247481,"Record function id": 0, "Concrete Inputs": ["", "0", "3211776", "4817664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[12847104], [], [], [], []], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997234.684, "dur": 0.954, + "args": { + "External id": 247482,"Record function id": 0, "Concrete Inputs": ["", "[1605888]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[12847104], [], [], []], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 4183438, "tid": 4183438, + "ts": 667917997245.108, "dur": 31.132, + "args": { + "External id": 247483,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1605888], [], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997247.371, "dur": 3.186, + "args": { + "External id": 247484,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3211776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997252.230, "dur": 1.226, + "args": { + "External id": 247485,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3211904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997255.057, "dur": 0.777, + "args": { + "External id": 247486,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3342976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997258.769, "dur": 0.516, + "args": { + "External id": 247487,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997260.326, "dur": 0.771, + "args": { + "External id": 247488,"Record function id": 0, "Concrete Inputs": ["", "[131072]", "[1]", "3605120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997262.276, "dur": 0.681, + "args": { + "External id": 247489,"Record function id": 0, "Concrete Inputs": ["", "[128]", "[1]", "3736192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997265.722, "dur": 0.479, + "args": { + "External id": 247490,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "3736320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997267.496, "dur": 0.832, + "args": { + "External id": 247491,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4096768"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917997269.104, "dur": 2.543, + "args": { + "External id": 247492,"Record function id": 0, "Concrete Inputs": ["", "[360448]", "[1]", "4457216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1605888], [], [], []], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917997288.778, "dur": 26.782, + "args": { + "External id": 247493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], [[128], [131072], [131072], [131072], [131072], [128], [360448], [360448], [360448]], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 4183438, "tid": 4183438, + "ts": 667917997378.599, "dur": 410.568, + "args": { + "External id": 247494,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[12847104], [1605888], [], [], []], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917997406.303, "dur": 377.699, + "args": { + "External id": 247495,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 12847104, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1605888], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6326, "In msg nelems": 1605888 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 4183438, "tid": 4183438, + "ts": 667917997416.717, "dur": 361.371, + "args": { + "External id": 247496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[1605888]], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667917997811.823, "dur": 2.377, + "args": { + "External id": 247497,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6328, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 4183438, "tid": 4183438, + "ts": 667917997908.652, "dur": 7392.885, + "args": { + "External id": 247498,"Record function id": 0, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998014.724, "dur": 6.808, + "args": { + "External id": 247499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[12847104], []], "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998033.909, "dur": 1.395, + "args": { + "External id": 247500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998037.157, "dur": 1.296, + "args": { + "External id": 247501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998040.251, "dur": 1.138, + "args": { + "External id": 247502,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998042.954, "dur": 1.155, + "args": { + "External id": 247503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998045.389, "dur": 1.204, + "args": { + "External id": 247504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1048576], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998048.332, "dur": 1.312, + "args": { + "External id": 247505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1024], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998051.642, "dur": 2.145, + "args": { + "External id": 247506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998055.449, "dur": 0.747, + "args": { + "External id": 247507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667917998057.950, "dur": 1.307, + "args": { + "External id": 247508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2883584], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917998080.128, "dur": 7181.161, + "args": { + "External id": 247509,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917998096.411, "dur": 7157.103, + "args": { + "External id": 247510,"Record function id": 0, "Concrete Inputs": ["", "[128, 131072, 131072, 131072, 131072, 128, 360448, 360448, 360448]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[1605888, 1], [], [], [[128, 1], [131072, 1], [131072, 1], [131072, 1], [131072, 1], [128, 1], [360448, 1], [360448, 1], [360448, 1]]], "Input Dims": [[8, 1605888], [], [], [[8, 128], [8, 131072], [8, 131072], [8, 131072], [8, 131072], [8, 128], [8, 360448], [8, 360448], [8, 360448]]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667917998109.711, "dur": 14.572, + "args": { + "External id": 247511,"Record function id": 0, "Concrete Inputs": ["[823]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667917998126.759, "dur": 7038.425, + "args": { + "External id": 247512,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667917998129.492, "dur": 7035.172, + "args": { + "External id": 247513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[823], [], [], [], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667917998135.872, "dur": 5.718, + "args": { + "External id": 247514,"Record function id": 0, "Concrete Inputs": ["[823]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667917998143.295, "dur": 7018.235, + "args": { + "External id": 247515,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[823], [823], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918005439.280, "dur": 35.163, + "args": { + "External id": 247516,"Sequence number": 2987571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024]], "Ev Idx": 6347 + } + }, + { + "ph": "s", "id": 173, "pid": 4183438, "tid": 4183438, "ts": 667918005439.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667918005460.980, "dur": 8.107, + "args": { + "External id": 247517,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1]], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918005464.202, "dur": 4.639, + "args": { + "External id": 247518,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667918005539.709, "dur": 90.024, + "args": { + "External id": 247519,"Record function id": 0, "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 4183438, "tid": 4183438, + "ts": 667918005631.466, "dur": 1142.124, + "args": { + "External id": 247520,"Record function id": 0, "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918005716.018, "dur": 1040.786, + "args": { + "External id": 247521,"Sequence number": 2987572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1], [1024, 1], [1024, 1], [1024, 1], [32, 1], [32, 1], [1024, 1], [1], [1024, 1], [1024, 1], [2816, 1]], "Input Dims": [[16, 4096, 1024], [1024], [1024, 1024], [1024, 1024], [1024, 1024], [8192, 32], [8192, 32], [1024, 1024], [1024], [2816, 1024], [2816, 1024], [1024, 2816]], "Ev Idx": 6352 + } + }, + { + "ph": "s", "id": 172, "pid": 4183438, "tid": 4183438, "ts": 667918005716.018, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667918005787.776, "dur": 48.076, + "args": { + "External id": 247522,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918005851.830, "dur": 94.601, + "args": { + "External id": 247523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918005955.236, "dur": 35.746, + "args": { + "External id": 247524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918006000.218, "dur": 30.595, + "args": { + "External id": 247525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667918006054.349, "dur": 25.600, + "args": { + "External id": 247526,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 4183438, "tid": 4183438, + "ts": 667918006097.721, "dur": 14.189, + "args": { + "External id": 247527,"kernel_hash": "cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "16", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/v3/cv3g2wsdzeyx5boypyj27xaitsqinaivjy4szifyb3uqvdwkozw5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918006129.087, "dur": 149.911, + "args": { + "External id": 247528,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918006180.598, "dur": 27.170, + "args": { + "External id": 247529,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918006185.765, "dur": 20.682, + "args": { + "External id": 247530,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918006211.289, "dur": 4.844, + "args": { + "External id": 247531,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918006217.640, "dur": 1.459, + "args": { + "External id": 247532,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918006221.645, "dur": 3.721, + "args": { + "External id": 247533,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918006289.522, "dur": 50.759, + "args": { + "External id": 247534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [1024, 1]], "Input Dims": [[65536, 1024], [1024, 1024], [65536, 1024]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 4183438, "tid": 4183438, + "ts": 667918006372.935, "dur": 29.562, + "args": { + "External id": 247535,"kernel_hash": "cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/tt/cttakkz2bnieukw67monti4duhgj3fsbcaibwt2mjgnnrdznnjm2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1024, 1], [1024, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536, 1024], [1024], [65536, 1024], [65536, 1024], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918006411.186, "dur": 39.879, + "args": { + "External id": 247536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918006460.223, "dur": 32.361, + "args": { + "External id": 247537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024], [2816, 1]], "Input Dims": [[65536, 1024], [1024, 2816], [65536, 2816]], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 4183438, "tid": 4183438, + "ts": 667918006514.745, "dur": 28.468, + "args": { + "External id": 247538,"kernel_hash": "clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp", "grid": "grid(184549376,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "184549376"], "kernel_file": "/tmp/torchinductor_cvm/lz/clzix6hvzjx55eyxbe3uw2ngyqaq3mu6uwbzhejcgoxnzlj4utbp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2816, 1], [2816, 1], [11534336, 2816, 1], []], "Input Dims": [[65536, 2816], [65536, 2816], [16, 4096, 2816], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918006549.355, "dur": 46.299, + "args": { + "External id": 247539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816], [1024, 1]], "Input Dims": [[65536, 2816], [2816, 1024], [65536, 1024]], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 4183438, "tid": 4183438, + "ts": 667918006615.419, "dur": 19.313, + "args": { + "External id": 247540,"kernel_hash": "cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds", "grid": "grid(67108864,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "67108864"], "kernel_file": "/tmp/torchinductor_cvm/im/cimycee4ybs4iko3ux3z53wborga7phhsjwuofkutdacu75wrlds.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [65536, 1024], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 4183438, "tid": 4183438, + "ts": 667918006843.562, "dur": 36.759, + "args": { + "External id": 247541,"Record function id": 0, "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918006965.016, "dur": 307.316, + "args": { + "External id": 247542,"Sequence number": 2987573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [], [], [], [], []], "Ev Idx": 6373 + } + }, + { + "ph": "s", "id": 171, "pid": 4183438, "tid": 4183438, "ts": 667918006965.016, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918006998.119, "dur": 8.201, + "args": { + "External id": 247543,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918006999.865, "dur": 6.209, + "args": { + "External id": 247544,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918007016.772, "dur": 13.399, + "args": { + "External id": 247545,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007019.767, "dur": 9.872, + "args": { + "External id": 247546,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918007037.862, "dur": 4.957, + "args": { + "External id": 247547,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007247.981, "dur": 9.263, + "args": { + "External id": 247548,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007252.424, "dur": 4.447, + "args": { + "External id": 247549,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918007300.614, "dur": 135.458, + "args": { + "External id": 247550,"Sequence number": 2987574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918007303.907, "dur": 13.481, + "args": { + "External id": 247551,"Sequence number": 2987574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6382 + } + }, + { + "ph": "s", "id": 170, "pid": 4183438, "tid": 4183438, "ts": 667918007303.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918007309.079, "dur": 6.563, + "args": { + "External id": 247552,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007312.956, "dur": 2.351, + "args": { + "External id": 247553,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918007320.647, "dur": 115.089, + "args": { + "External id": 247554,"Sequence number": 2987575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007323.724, "dur": 5.598, + "args": { + "External id": 247555,"Sequence number": 2987575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007325.028, "dur": 4.155, + "args": { + "External id": 247556,"Sequence number": 2987575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6387 + } + }, + { + "ph": "s", "id": 169, "pid": 4183438, "tid": 4183438, "ts": 667918007325.028, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918007331.447, "dur": 93.569, + "args": { + "External id": 247557,"Sequence number": 2987576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6388 + } + }, + { + "ph": "s", "id": 168, "pid": 4183438, "tid": 4183438, "ts": 667918007331.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918007428.387, "dur": 6.151, + "args": { + "External id": 247558,"Sequence number": 2987577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6389 + } + }, + { + "ph": "s", "id": 167, "pid": 4183438, "tid": 4183438, "ts": 667918007428.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918007446.386, "dur": 66.659, + "args": { + "External id": 247559,"Sequence number": 2987578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918007447.461, "dur": 6.721, + "args": { + "External id": 247560,"Sequence number": 2987578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6391 + } + }, + { + "ph": "s", "id": 166, "pid": 4183438, "tid": 4183438, "ts": 667918007447.461, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918007449.790, "dur": 3.213, + "args": { + "External id": 247561,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007451.702, "dur": 1.085, + "args": { + "External id": 247562,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918007455.024, "dur": 57.730, + "args": { + "External id": 247563,"Sequence number": 2987579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007456.467, "dur": 7.100, + "args": { + "External id": 247564,"Sequence number": 2987579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007458.409, "dur": 4.983, + "args": { + "External id": 247565,"Sequence number": 2987579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6396 + } + }, + { + "ph": "s", "id": 165, "pid": 4183438, "tid": 4183438, "ts": 667918007458.409, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918007464.518, "dur": 41.076, + "args": { + "External id": 247566,"Sequence number": 2987580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6397 + } + }, + { + "ph": "s", "id": 164, "pid": 4183438, "tid": 4183438, "ts": 667918007464.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918007507.434, "dur": 4.881, + "args": { + "External id": 247567,"Sequence number": 2987581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6398 + } + }, + { + "ph": "s", "id": 163, "pid": 4183438, "tid": 4183438, "ts": 667918007507.434, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918007521.376, "dur": 61.810, + "args": { + "External id": 247568,"Sequence number": 2987582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918007522.206, "dur": 7.000, + "args": { + "External id": 247569,"Sequence number": 2987582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6400 + } + }, + { + "ph": "s", "id": 162, "pid": 4183438, "tid": 4183438, "ts": 667918007522.206, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918007524.766, "dur": 3.173, + "args": { + "External id": 247570,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007526.810, "dur": 0.977, + "args": { + "External id": 247571,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918007530.715, "dur": 52.263, + "args": { + "External id": 247572,"Sequence number": 2987583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007531.989, "dur": 5.430, + "args": { + "External id": 247573,"Sequence number": 2987583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007533.192, "dur": 4.069, + "args": { + "External id": 247574,"Sequence number": 2987583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6405 + } + }, + { + "ph": "s", "id": 161, "pid": 4183438, "tid": 4183438, "ts": 667918007533.192, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918007538.215, "dur": 38.162, + "args": { + "External id": 247575,"Sequence number": 2987584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6406 + } + }, + { + "ph": "s", "id": 160, "pid": 4183438, "tid": 4183438, "ts": 667918007538.215, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918007578.084, "dur": 4.451, + "args": { + "External id": 247576,"Sequence number": 2987585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6407 + } + }, + { + "ph": "s", "id": 159, "pid": 4183438, "tid": 4183438, "ts": 667918007578.084, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007604.668, "dur": 4.329, + "args": { + "External id": 247577,"Sequence number": 2987586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007605.912, "dur": 2.950, + "args": { + "External id": 247578,"Sequence number": 2987586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6409 + } + }, + { + "ph": "s", "id": 158, "pid": 4183438, "tid": 4183438, "ts": 667918007605.912, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007618.761, "dur": 5.470, + "args": { + "External id": 247579,"Sequence number": 2987587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007620.062, "dur": 4.013, + "args": { + "External id": 247580,"Sequence number": 2987587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6411 + } + }, + { + "ph": "s", "id": 157, "pid": 4183438, "tid": 4183438, "ts": 667918007620.062, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918007628.523, "dur": 3.538, + "args": { + "External id": 247581,"Sequence number": 2987588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918007629.764, "dur": 2.158, + "args": { + "External id": 247582,"Sequence number": 2987588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6413 + } + }, + { + "ph": "s", "id": 156, "pid": 4183438, "tid": 4183438, "ts": 667918007629.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918007704.876, "dur": 189.808, + "args": { + "External id": 247583,"Sequence number": 2987589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "s", "id": 155, "pid": 4183438, "tid": 4183438, "ts": 667918007704.876, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918007728.532, "dur": 10.820, + "args": { + "External id": 247584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007732.221, "dur": 6.495, + "args": { + "External id": 247585,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918007910.385, "dur": 112.135, + "args": { + "External id": 247586,"Sequence number": 2987590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "s", "id": 154, "pid": 4183438, "tid": 4183438, "ts": 667918007910.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918007925.101, "dur": 8.578, + "args": { + "External id": 247587,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918007927.482, "dur": 5.783, + "args": { + "External id": 247588,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 4183438, "tid": 4183438, + "ts": 667918008054.445, "dur": 207.633, + "args": { + "External id": 247589,"Sequence number": 2987591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "s", "id": 153, "pid": 4183438, "tid": 4183438, "ts": 667918008054.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918008084.211, "dur": 146.396, + "args": { + "External id": 247590,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918008133.273, "dur": 8.252, + "args": { + "External id": 247591,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918008135.797, "dur": 5.028, + "args": { + "External id": 247592,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008144.505, "dur": 4.023, + "args": { + "External id": 247593,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008150.050, "dur": 1.785, + "args": { + "External id": 247594,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008154.642, "dur": 5.360, + "args": { + "External id": 247595,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 4183438, + "ts": 667918008245.307, "dur": 5.854, + "args": { + "External id": 247596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008268.094, "dur": 6.849, + "args": { + "External id": 247597,"Sequence number": 2987592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008269.749, "dur": 5.078, + "args": { + "External id": 247598,"Sequence number": 2987592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6429 + } + }, + { + "ph": "s", "id": 152, "pid": 4183438, "tid": 4183438, "ts": 667918008269.749, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918008288.142, "dur": 114.056, + "args": { + "External id": 247599,"Sequence number": 2987593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918008289.534, "dur": 9.833, + "args": { + "External id": 247600,"Sequence number": 2987593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6431 + } + }, + { + "ph": "s", "id": 151, "pid": 4183438, "tid": 4183438, "ts": 667918008289.534, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918008293.103, "dur": 5.125, + "args": { + "External id": 247601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918008296.328, "dur": 1.554, + "args": { + "External id": 247602,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918008300.818, "dur": 101.054, + "args": { + "External id": 247603,"Sequence number": 2987594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008303.442, "dur": 5.757, + "args": { + "External id": 247604,"Sequence number": 2987594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008304.708, "dur": 4.340, + "args": { + "External id": 247605,"Sequence number": 2987594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6436 + } + }, + { + "ph": "s", "id": 150, "pid": 4183438, "tid": 4183438, "ts": 667918008304.708, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918008310.134, "dur": 84.222, + "args": { + "External id": 247606,"Sequence number": 2987595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6437 + } + }, + { + "ph": "s", "id": 149, "pid": 4183438, "tid": 4183438, "ts": 667918008310.134, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918008397.126, "dur": 4.037, + "args": { + "External id": 247607,"Sequence number": 2987596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6438 + } + }, + { + "ph": "s", "id": 148, "pid": 4183438, "tid": 4183438, "ts": 667918008397.126, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918008437.570, "dur": 268.745, + "args": { + "External id": 247608,"Sequence number": 2987597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [16, 4096, 1024], [], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "s", "id": 147, "pid": 4183438, "tid": 4183438, "ts": 667918008437.570, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008456.122, "dur": 3.433, + "args": { + "External id": 247609,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008457.495, "dur": 1.908, + "args": { + "External id": 247610,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 4183438, "tid": 4183438, + "ts": 667918008464.611, "dur": 4.251, + "args": { + "External id": 247611,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1024, 1]], "Input Dims": [[16, 4096, 1024], [65536, 1024]], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008466.197, "dur": 2.541, + "args": { + "External id": 247612,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008467.380, "dur": 1.251, + "args": { + "External id": 247613,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918008477.184, "dur": 7.695, + "args": { + "External id": 247614,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918008479.736, "dur": 4.814, + "args": { + "External id": 247615,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008491.596, "dur": 3.660, + "args": { + "External id": 247616,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008498.896, "dur": 4.602, + "args": { + "External id": 247617,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008641.891, "dur": 5.006, + "args": { + "External id": 247618,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008643.160, "dur": 3.491, + "args": { + "External id": 247619,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008649.664, "dur": 41.015, + "args": { + "External id": 247620,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008688.265, "dur": 2.070, + "args": { + "External id": 247621,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918008731.211, "dur": 110.841, + "args": { + "External id": 247622,"Sequence number": 2987598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918008732.480, "dur": 9.567, + "args": { + "External id": 247623,"Sequence number": 2987598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6454 + } + }, + { + "ph": "s", "id": 146, "pid": 4183438, "tid": 4183438, "ts": 667918008732.480, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918008736.212, "dur": 4.695, + "args": { + "External id": 247624,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918008738.875, "dur": 1.737, + "args": { + "External id": 247625,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918008743.066, "dur": 98.667, + "args": { + "External id": 247626,"Sequence number": 2987599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008745.324, "dur": 6.935, + "args": { + "External id": 247627,"Sequence number": 2987599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008746.551, "dur": 5.570, + "args": { + "External id": 247628,"Sequence number": 2987599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6459 + } + }, + { + "ph": "s", "id": 145, "pid": 4183438, "tid": 4183438, "ts": 667918008746.551, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918008753.298, "dur": 81.948, + "args": { + "External id": 247629,"Sequence number": 2987600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6460 + } + }, + { + "ph": "s", "id": 144, "pid": 4183438, "tid": 4183438, "ts": 667918008753.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918008837.776, "dur": 3.292, + "args": { + "External id": 247630,"Sequence number": 2987601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6461 + } + }, + { + "ph": "s", "id": 143, "pid": 4183438, "tid": 4183438, "ts": 667918008837.776, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918008850.504, "dur": 70.030, + "args": { + "External id": 247631,"Sequence number": 2987602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918008851.250, "dur": 9.357, + "args": { + "External id": 247632,"Sequence number": 2987602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6463 + } + }, + { + "ph": "s", "id": 142, "pid": 4183438, "tid": 4183438, "ts": 667918008851.250, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918008853.262, "dur": 6.102, + "args": { + "External id": 247633,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918008858.009, "dur": 1.170, + "args": { + "External id": 247634,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918008861.389, "dur": 58.941, + "args": { + "External id": 247635,"Sequence number": 2987603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918008863.058, "dur": 5.454, + "args": { + "External id": 247636,"Sequence number": 2987603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918008864.448, "dur": 3.917, + "args": { + "External id": 247637,"Sequence number": 2987603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6468 + } + }, + { + "ph": "s", "id": 141, "pid": 4183438, "tid": 4183438, "ts": 667918008864.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918008869.084, "dur": 44.994, + "args": { + "External id": 247638,"Sequence number": 2987604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6469 + } + }, + { + "ph": "s", "id": 140, "pid": 4183438, "tid": 4183438, "ts": 667918008869.084, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918008916.039, "dur": 3.945, + "args": { + "External id": 247639,"Sequence number": 2987605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6470 + } + }, + { + "ph": "s", "id": 139, "pid": 4183438, "tid": 4183438, "ts": 667918008916.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918008944.966, "dur": 159.268, + "args": { + "External id": 247640,"Sequence number": 2987606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6471 + } + }, + { + "ph": "s", "id": 138, "pid": 4183438, "tid": 4183438, "ts": 667918008944.966, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918008990.484, "dur": 4.361, + "args": { + "External id": 247641,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918009030.240, "dur": 59.487, + "args": { + "External id": 247642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918009031.116, "dur": 5.017, + "args": { + "External id": 247643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918009032.227, "dur": 3.012, + "args": { + "External id": 247644,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009033.972, "dur": 1.053, + "args": { + "External id": 247645,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918009037.056, "dur": 52.297, + "args": { + "External id": 247646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[11534336, 2816, 1], [1, 2816]], "Input Dims": [[16, 4096, 2816], [2816, 1024]], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009038.893, "dur": 3.108, + "args": { + "External id": 247647,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009040.295, "dur": 1.518, + "args": { + "External id": 247648,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918009042.747, "dur": 42.580, + "args": { + "External id": 247649,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918009087.217, "dur": 1.490, + "args": { + "External id": 247650,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918009112.951, "dur": 28.951, + "args": { + "External id": 247651,"Sequence number": 2987607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 6482 + } + }, + { + "ph": "s", "id": 137, "pid": 4183438, "tid": 4183438, "ts": 667918009112.951, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918009178.129, "dur": 221.137, + "args": { + "External id": 247652,"Sequence number": 2987608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [], [], [], [], []], "Ev Idx": 6483 + } + }, + { + "ph": "s", "id": 136, "pid": 4183438, "tid": 4183438, "ts": 667918009178.129, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009217.872, "dur": 4.744, + "args": { + "External id": 247653,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009219.354, "dur": 2.919, + "args": { + "External id": 247654,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918009231.000, "dur": 8.722, + "args": { + "External id": 247655,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009233.968, "dur": 5.258, + "args": { + "External id": 247656,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918009246.128, "dur": 3.721, + "args": { + "External id": 247657,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009381.793, "dur": 5.703, + "args": { + "External id": 247658,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009384.787, "dur": 2.477, + "args": { + "External id": 247659,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918009417.855, "dur": 88.286, + "args": { + "External id": 247660,"Sequence number": 2987609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918009419.219, "dur": 7.628, + "args": { + "External id": 247661,"Sequence number": 2987609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6492 + } + }, + { + "ph": "s", "id": 135, "pid": 4183438, "tid": 4183438, "ts": 667918009419.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918009421.677, "dur": 4.042, + "args": { + "External id": 247662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009424.193, "dur": 1.320, + "args": { + "External id": 247663,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918009427.841, "dur": 78.053, + "args": { + "External id": 247664,"Sequence number": 2987610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009430.090, "dur": 4.477, + "args": { + "External id": 247665,"Sequence number": 2987610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009431.226, "dur": 3.215, + "args": { + "External id": 247666,"Sequence number": 2987610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6497 + } + }, + { + "ph": "s", "id": 134, "pid": 4183438, "tid": 4183438, "ts": 667918009431.226, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918009435.298, "dur": 65.276, + "args": { + "External id": 247667,"Sequence number": 2987611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6498 + } + }, + { + "ph": "s", "id": 133, "pid": 4183438, "tid": 4183438, "ts": 667918009435.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918009503.277, "dur": 1.990, + "args": { + "External id": 247668,"Sequence number": 2987612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6499 + } + }, + { + "ph": "s", "id": 132, "pid": 4183438, "tid": 4183438, "ts": 667918009503.277, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918009514.838, "dur": 65.362, + "args": { + "External id": 247669,"Sequence number": 2987613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918009515.714, "dur": 7.958, + "args": { + "External id": 247670,"Sequence number": 2987613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6501 + } + }, + { + "ph": "s", "id": 131, "pid": 4183438, "tid": 4183438, "ts": 667918009515.714, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918009519.667, "dur": 2.851, + "args": { + "External id": 247671,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009521.350, "dur": 1.037, + "args": { + "External id": 247672,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918009524.332, "dur": 55.631, + "args": { + "External id": 247673,"Sequence number": 2987614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009526.025, "dur": 6.371, + "args": { + "External id": 247674,"Sequence number": 2987614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009527.343, "dur": 4.890, + "args": { + "External id": 247675,"Sequence number": 2987614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6506 + } + }, + { + "ph": "s", "id": 130, "pid": 4183438, "tid": 4183438, "ts": 667918009527.343, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918009533.104, "dur": 40.354, + "args": { + "External id": 247676,"Sequence number": 2987615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6507 + } + }, + { + "ph": "s", "id": 129, "pid": 4183438, "tid": 4183438, "ts": 667918009533.104, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918009575.378, "dur": 4.171, + "args": { + "External id": 247677,"Sequence number": 2987616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6508 + } + }, + { + "ph": "s", "id": 128, "pid": 4183438, "tid": 4183438, "ts": 667918009575.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918009587.955, "dur": 58.077, + "args": { + "External id": 247678,"Sequence number": 2987617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918009588.741, "dur": 5.890, + "args": { + "External id": 247679,"Sequence number": 2987617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6510 + } + }, + { + "ph": "s", "id": 127, "pid": 4183438, "tid": 4183438, "ts": 667918009588.741, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918009590.364, "dur": 3.045, + "args": { + "External id": 247680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009592.474, "dur": 0.766, + "args": { + "External id": 247681,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918009595.274, "dur": 50.572, + "args": { + "External id": 247682,"Sequence number": 2987618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009596.667, "dur": 5.158, + "args": { + "External id": 247683,"Sequence number": 2987618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009598.085, "dur": 3.602, + "args": { + "External id": 247684,"Sequence number": 2987618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6515 + } + }, + { + "ph": "s", "id": 126, "pid": 4183438, "tid": 4183438, "ts": 667918009598.085, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918009602.598, "dur": 38.043, + "args": { + "External id": 247685,"Sequence number": 2987619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6516 + } + }, + { + "ph": "s", "id": 125, "pid": 4183438, "tid": 4183438, "ts": 667918009602.598, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918009642.600, "dur": 2.837, + "args": { + "External id": 247686,"Sequence number": 2987620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6517 + } + }, + { + "ph": "s", "id": 124, "pid": 4183438, "tid": 4183438, "ts": 667918009642.600, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009700.676, "dur": 5.759, + "args": { + "External id": 247687,"Sequence number": 2987621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009701.682, "dur": 4.486, + "args": { + "External id": 247688,"Sequence number": 2987621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6519 + } + }, + { + "ph": "s", "id": 123, "pid": 4183438, "tid": 4183438, "ts": 667918009701.682, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009713.826, "dur": 5.690, + "args": { + "External id": 247689,"Sequence number": 2987622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009715.039, "dur": 4.329, + "args": { + "External id": 247690,"Sequence number": 2987622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6521 + } + }, + { + "ph": "s", "id": 122, "pid": 4183438, "tid": 4183438, "ts": 667918009715.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918009723.735, "dur": 4.711, + "args": { + "External id": 247691,"Sequence number": 2987623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918009724.687, "dur": 3.611, + "args": { + "External id": 247692,"Sequence number": 2987623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6523 + } + }, + { + "ph": "s", "id": 121, "pid": 4183438, "tid": 4183438, "ts": 667918009724.687, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918009759.967, "dur": 159.656, + "args": { + "External id": 247693,"Sequence number": 2987624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6524 + } + }, + { + "ph": "s", "id": 120, "pid": 4183438, "tid": 4183438, "ts": 667918009759.967, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918009780.489, "dur": 9.211, + "args": { + "External id": 247694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009783.154, "dur": 6.055, + "args": { + "External id": 247695,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918009932.193, "dur": 102.626, + "args": { + "External id": 247696,"Sequence number": 2987625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6527 + } + }, + { + "ph": "s", "id": 119, "pid": 4183438, "tid": 4183438, "ts": 667918009932.193, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918009945.824, "dur": 7.457, + "args": { + "External id": 247697,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918009948.441, "dur": 4.524, + "args": { + "External id": 247698,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 4183438, "tid": 4183438, + "ts": 667918010063.004, "dur": 202.160, + "args": { + "External id": 247699,"Sequence number": 2987626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "s", "id": 118, "pid": 4183438, "tid": 4183438, "ts": 667918010063.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918010090.536, "dur": 146.502, + "args": { + "External id": 247700,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918010140.588, "dur": 7.344, + "args": { + "External id": 247701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010143.236, "dur": 4.086, + "args": { + "External id": 247702,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010150.579, "dur": 4.025, + "args": { + "External id": 247703,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010156.274, "dur": 1.146, + "args": { + "External id": 247704,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010162.344, "dur": 3.218, + "args": { + "External id": 247705,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 4183438, + "ts": 667918010249.388, "dur": 5.033, + "args": { + "External id": 247706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010270.573, "dur": 6.434, + "args": { + "External id": 247707,"Sequence number": 2987627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010272.390, "dur": 4.437, + "args": { + "External id": 247708,"Sequence number": 2987627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6539 + } + }, + { + "ph": "s", "id": 117, "pid": 4183438, "tid": 4183438, "ts": 667918010272.390, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918010288.234, "dur": 110.350, + "args": { + "External id": 247709,"Sequence number": 2987628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918010289.807, "dur": 9.258, + "args": { + "External id": 247710,"Sequence number": 2987628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6541 + } + }, + { + "ph": "s", "id": 116, "pid": 4183438, "tid": 4183438, "ts": 667918010289.807, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918010293.394, "dur": 4.571, + "args": { + "External id": 247711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010296.149, "dur": 1.552, + "args": { + "External id": 247712,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918010300.502, "dur": 97.799, + "args": { + "External id": 247713,"Sequence number": 2987629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010302.763, "dur": 3.872, + "args": { + "External id": 247714,"Sequence number": 2987629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010304.196, "dur": 2.305, + "args": { + "External id": 247715,"Sequence number": 2987629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6546 + } + }, + { + "ph": "s", "id": 115, "pid": 4183438, "tid": 4183438, "ts": 667918010304.196, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918010307.875, "dur": 84.233, + "args": { + "External id": 247716,"Sequence number": 2987630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6547 + } + }, + { + "ph": "s", "id": 114, "pid": 4183438, "tid": 4183438, "ts": 667918010307.875, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918010394.282, "dur": 3.354, + "args": { + "External id": 247717,"Sequence number": 2987631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6548 + } + }, + { + "ph": "s", "id": 113, "pid": 4183438, "tid": 4183438, "ts": 667918010394.282, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918010433.068, "dur": 206.387, + "args": { + "External id": 247718,"Sequence number": 2987632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [16, 4096, 1024], [], [], [], []], "Ev Idx": 6549 + } + }, + { + "ph": "s", "id": 112, "pid": 4183438, "tid": 4183438, "ts": 667918010433.068, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010452.293, "dur": 3.073, + "args": { + "External id": 247719,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010453.780, "dur": 1.416, + "args": { + "External id": 247720,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 4183438, "tid": 4183438, + "ts": 667918010458.930, "dur": 4.642, + "args": { + "External id": 247721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1024, 1]], "Input Dims": [[16, 4096, 1024], [65536, 1024]], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010461.038, "dur": 2.427, + "args": { + "External id": 247722,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010462.216, "dur": 1.148, + "args": { + "External id": 247723,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918010471.037, "dur": 7.368, + "args": { + "External id": 247724,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010473.580, "dur": 4.507, + "args": { + "External id": 247725,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010485.140, "dur": 3.341, + "args": { + "External id": 247726,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010492.281, "dur": 2.818, + "args": { + "External id": 247727,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010617.235, "dur": 5.695, + "args": { + "External id": 247728,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010620.778, "dur": 1.892, + "args": { + "External id": 247729,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010625.667, "dur": 2.990, + "args": { + "External id": 247730,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010626.967, "dur": 1.361, + "args": { + "External id": 247731,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918010693.066, "dur": 118.076, + "args": { + "External id": 247732,"Sequence number": 2987633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918010694.500, "dur": 9.146, + "args": { + "External id": 247733,"Sequence number": 2987633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6564 + } + }, + { + "ph": "s", "id": 111, "pid": 4183438, "tid": 4183438, "ts": 667918010694.500, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918010697.532, "dur": 4.507, + "args": { + "External id": 247734,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010700.215, "dur": 1.420, + "args": { + "External id": 247735,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918010704.606, "dur": 106.170, + "args": { + "External id": 247736,"Sequence number": 2987634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010706.566, "dur": 6.489, + "args": { + "External id": 247737,"Sequence number": 2987634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010707.979, "dur": 4.865, + "args": { + "External id": 247738,"Sequence number": 2987634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6569 + } + }, + { + "ph": "s", "id": 110, "pid": 4183438, "tid": 4183438, "ts": 667918010707.979, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918010713.830, "dur": 72.889, + "args": { + "External id": 247739,"Sequence number": 2987635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6570 + } + }, + { + "ph": "s", "id": 109, "pid": 4183438, "tid": 4183438, "ts": 667918010713.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918010803.615, "dur": 6.444, + "args": { + "External id": 247740,"Sequence number": 2987636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6571 + } + }, + { + "ph": "s", "id": 108, "pid": 4183438, "tid": 4183438, "ts": 667918010803.615, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918010821.579, "dur": 66.103, + "args": { + "External id": 247741,"Sequence number": 2987637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918010822.262, "dur": 6.020, + "args": { + "External id": 247742,"Sequence number": 2987637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6573 + } + }, + { + "ph": "s", "id": 107, "pid": 4183438, "tid": 4183438, "ts": 667918010822.262, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918010824.116, "dur": 3.008, + "args": { + "External id": 247743,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010826.062, "dur": 0.909, + "args": { + "External id": 247744,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918010829.268, "dur": 58.203, + "args": { + "External id": 247745,"Sequence number": 2987638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918010830.778, "dur": 5.128, + "args": { + "External id": 247746,"Sequence number": 2987638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918010831.840, "dur": 3.925, + "args": { + "External id": 247747,"Sequence number": 2987638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6578 + } + }, + { + "ph": "s", "id": 106, "pid": 4183438, "tid": 4183438, "ts": 667918010831.840, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918010836.378, "dur": 46.819, + "args": { + "External id": 247748,"Sequence number": 2987639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6579 + } + }, + { + "ph": "s", "id": 105, "pid": 4183438, "tid": 4183438, "ts": 667918010836.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918010885.282, "dur": 1.841, + "args": { + "External id": 247749,"Sequence number": 2987640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6580 + } + }, + { + "ph": "s", "id": 104, "pid": 4183438, "tid": 4183438, "ts": 667918010885.282, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918010911.520, "dur": 152.214, + "args": { + "External id": 247750,"Sequence number": 2987641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6581 + } + }, + { + "ph": "s", "id": 103, "pid": 4183438, "tid": 4183438, "ts": 667918010911.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918010951.139, "dur": 4.521, + "args": { + "External id": 247751,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918010989.033, "dur": 59.074, + "args": { + "External id": 247752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918010989.766, "dur": 7.599, + "args": { + "External id": 247753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918010990.960, "dur": 5.360, + "args": { + "External id": 247754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918010995.278, "dur": 0.850, + "args": { + "External id": 247755,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918010998.221, "dur": 49.386, + "args": { + "External id": 247756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[11534336, 2816, 1], [1, 2816]], "Input Dims": [[16, 4096, 2816], [2816, 1024]], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011000.066, "dur": 2.336, + "args": { + "External id": 247757,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011001.052, "dur": 1.234, + "args": { + "External id": 247758,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918011003.417, "dur": 40.622, + "args": { + "External id": 247759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918011045.963, "dur": 1.005, + "args": { + "External id": 247760,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918011072.686, "dur": 24.503, + "args": { + "External id": 247761,"Sequence number": 2987642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 6592 + } + }, + { + "ph": "s", "id": 102, "pid": 4183438, "tid": 4183438, "ts": 667918011072.686, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918011134.666, "dur": 222.079, + "args": { + "External id": 247762,"Sequence number": 2987643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "s", "id": 101, "pid": 4183438, "tid": 4183438, "ts": 667918011134.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011153.990, "dur": 3.760, + "args": { + "External id": 247763,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011155.126, "dur": 2.141, + "args": { + "External id": 247764,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918011165.493, "dur": 8.475, + "args": { + "External id": 247765,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011168.775, "dur": 4.734, + "args": { + "External id": 247766,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918011180.364, "dur": 3.960, + "args": { + "External id": 247767,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011337.258, "dur": 4.447, + "args": { + "External id": 247768,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011338.490, "dur": 2.918, + "args": { + "External id": 247769,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918011377.124, "dur": 92.588, + "args": { + "External id": 247770,"Sequence number": 2987644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918011378.715, "dur": 8.674, + "args": { + "External id": 247771,"Sequence number": 2987644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6602 + } + }, + { + "ph": "s", "id": 100, "pid": 4183438, "tid": 4183438, "ts": 667918011378.715, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918011381.717, "dur": 4.107, + "args": { + "External id": 247772,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011384.093, "dur": 1.548, + "args": { + "External id": 247773,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918011388.476, "dur": 80.968, + "args": { + "External id": 247774,"Sequence number": 2987645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011390.625, "dur": 4.997, + "args": { + "External id": 247775,"Sequence number": 2987645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011391.920, "dur": 3.555, + "args": { + "External id": 247776,"Sequence number": 2987645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6607 + } + }, + { + "ph": "s", "id": 99, "pid": 4183438, "tid": 4183438, "ts": 667918011391.920, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918011396.625, "dur": 62.876, + "args": { + "External id": 247777,"Sequence number": 2987646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6608 + } + }, + { + "ph": "s", "id": 98, "pid": 4183438, "tid": 4183438, "ts": 667918011396.625, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918011461.756, "dur": 6.988, + "args": { + "External id": 247778,"Sequence number": 2987647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6609 + } + }, + { + "ph": "s", "id": 97, "pid": 4183438, "tid": 4183438, "ts": 667918011461.756, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918011478.078, "dur": 61.031, + "args": { + "External id": 247779,"Sequence number": 2987648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918011478.788, "dur": 6.395, + "args": { + "External id": 247780,"Sequence number": 2987648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6611 + } + }, + { + "ph": "s", "id": 96, "pid": 4183438, "tid": 4183438, "ts": 667918011478.788, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918011480.834, "dur": 3.056, + "args": { + "External id": 247781,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011482.755, "dur": 0.988, + "args": { + "External id": 247782,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918011485.943, "dur": 52.990, + "args": { + "External id": 247783,"Sequence number": 2987649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011487.345, "dur": 3.708, + "args": { + "External id": 247784,"Sequence number": 2987649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011488.266, "dur": 2.654, + "args": { + "External id": 247785,"Sequence number": 2987649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6616 + } + }, + { + "ph": "s", "id": 95, "pid": 4183438, "tid": 4183438, "ts": 667918011488.266, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918011491.587, "dur": 43.200, + "args": { + "External id": 247786,"Sequence number": 2987650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6617 + } + }, + { + "ph": "s", "id": 94, "pid": 4183438, "tid": 4183438, "ts": 667918011491.587, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918011536.626, "dur": 1.912, + "args": { + "External id": 247787,"Sequence number": 2987651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6618 + } + }, + { + "ph": "s", "id": 93, "pid": 4183438, "tid": 4183438, "ts": 667918011536.626, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918011545.641, "dur": 55.469, + "args": { + "External id": 247788,"Sequence number": 2987652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918011546.331, "dur": 5.785, + "args": { + "External id": 247789,"Sequence number": 2987652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6620 + } + }, + { + "ph": "s", "id": 92, "pid": 4183438, "tid": 4183438, "ts": 667918011546.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918011548.169, "dur": 2.878, + "args": { + "External id": 247790,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011550.019, "dur": 0.835, + "args": { + "External id": 247791,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918011552.600, "dur": 48.331, + "args": { + "External id": 247792,"Sequence number": 2987653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011554.043, "dur": 4.687, + "args": { + "External id": 247793,"Sequence number": 2987653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011555.103, "dur": 3.488, + "args": { + "External id": 247794,"Sequence number": 2987653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6625 + } + }, + { + "ph": "s", "id": 91, "pid": 4183438, "tid": 4183438, "ts": 667918011555.103, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918011559.463, "dur": 37.440, + "args": { + "External id": 247795,"Sequence number": 2987654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6626 + } + }, + { + "ph": "s", "id": 90, "pid": 4183438, "tid": 4183438, "ts": 667918011559.463, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918011598.501, "dur": 2.055, + "args": { + "External id": 247796,"Sequence number": 2987655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6627 + } + }, + { + "ph": "s", "id": 89, "pid": 4183438, "tid": 4183438, "ts": 667918011598.501, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011615.929, "dur": 4.312, + "args": { + "External id": 247797,"Sequence number": 2987656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011617.049, "dur": 3.013, + "args": { + "External id": 247798,"Sequence number": 2987656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6629 + } + }, + { + "ph": "s", "id": 88, "pid": 4183438, "tid": 4183438, "ts": 667918011617.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011627.146, "dur": 5.393, + "args": { + "External id": 247799,"Sequence number": 2987657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011628.123, "dur": 4.257, + "args": { + "External id": 247800,"Sequence number": 2987657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6631 + } + }, + { + "ph": "s", "id": 87, "pid": 4183438, "tid": 4183438, "ts": 667918011628.123, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918011636.933, "dur": 3.876, + "args": { + "External id": 247801,"Sequence number": 2987658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918011638.803, "dur": 1.862, + "args": { + "External id": 247802,"Sequence number": 2987658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6633 + } + }, + { + "ph": "s", "id": 86, "pid": 4183438, "tid": 4183438, "ts": 667918011638.803, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918011709.947, "dur": 163.298, + "args": { + "External id": 247803,"Sequence number": 2987659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "s", "id": 85, "pid": 4183438, "tid": 4183438, "ts": 667918011709.947, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918011732.054, "dur": 9.414, + "args": { + "External id": 247804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011735.009, "dur": 5.684, + "args": { + "External id": 247805,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918011885.703, "dur": 104.435, + "args": { + "External id": 247806,"Sequence number": 2987660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "s", "id": 84, "pid": 4183438, "tid": 4183438, "ts": 667918011885.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918011899.678, "dur": 6.562, + "args": { + "External id": 247807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918011901.712, "dur": 4.185, + "args": { + "External id": 247808,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 4183438, "tid": 4183438, + "ts": 667918012051.896, "dur": 204.150, + "args": { + "External id": 247809,"Sequence number": 2987661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "s", "id": 83, "pid": 4183438, "tid": 4183438, "ts": 667918012051.896, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918012078.875, "dur": 146.239, + "args": { + "External id": 247810,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918012129.167, "dur": 9.332, + "args": { + "External id": 247811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012131.472, "dur": 6.450, + "args": { + "External id": 247812,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012143.713, "dur": 3.851, + "args": { + "External id": 247813,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012148.958, "dur": 1.391, + "args": { + "External id": 247814,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012153.012, "dur": 3.095, + "args": { + "External id": 247815,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 4183438, + "ts": 667918012238.329, "dur": 5.913, + "args": { + "External id": 247816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012261.521, "dur": 6.017, + "args": { + "External id": 247817,"Sequence number": 2987662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012263.032, "dur": 4.369, + "args": { + "External id": 247818,"Sequence number": 2987662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6649 + } + }, + { + "ph": "s", "id": 82, "pid": 4183438, "tid": 4183438, "ts": 667918012263.032, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918012281.130, "dur": 115.413, + "args": { + "External id": 247819,"Sequence number": 2987663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918012282.436, "dur": 11.457, + "args": { + "External id": 247820,"Sequence number": 2987663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6651 + } + }, + { + "ph": "s", "id": 81, "pid": 4183438, "tid": 4183438, "ts": 667918012282.436, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918012288.023, "dur": 4.616, + "args": { + "External id": 247821,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012290.923, "dur": 1.465, + "args": { + "External id": 247822,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918012295.336, "dur": 100.822, + "args": { + "External id": 247823,"Sequence number": 2987664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012297.436, "dur": 3.492, + "args": { + "External id": 247824,"Sequence number": 2987664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012298.825, "dur": 1.964, + "args": { + "External id": 247825,"Sequence number": 2987664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6656 + } + }, + { + "ph": "s", "id": 80, "pid": 4183438, "tid": 4183438, "ts": 667918012298.825, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918012302.505, "dur": 85.622, + "args": { + "External id": 247826,"Sequence number": 2987665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6657 + } + }, + { + "ph": "s", "id": 79, "pid": 4183438, "tid": 4183438, "ts": 667918012302.505, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918012390.293, "dur": 5.243, + "args": { + "External id": 247827,"Sequence number": 2987666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6658 + } + }, + { + "ph": "s", "id": 78, "pid": 4183438, "tid": 4183438, "ts": 667918012390.293, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918012430.907, "dur": 206.847, + "args": { + "External id": 247828,"Sequence number": 2987667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [16, 4096, 1024], [], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "s", "id": 77, "pid": 4183438, "tid": 4183438, "ts": 667918012430.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012448.834, "dur": 3.203, + "args": { + "External id": 247829,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012450.084, "dur": 1.691, + "args": { + "External id": 247830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 4183438, "tid": 4183438, + "ts": 667918012455.900, "dur": 4.346, + "args": { + "External id": 247831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1024, 1]], "Input Dims": [[16, 4096, 1024], [65536, 1024]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012457.608, "dur": 2.527, + "args": { + "External id": 247832,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012458.859, "dur": 1.160, + "args": { + "External id": 247833,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918012468.498, "dur": 7.490, + "args": { + "External id": 247834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012470.962, "dur": 4.677, + "args": { + "External id": 247835,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012482.382, "dur": 3.395, + "args": { + "External id": 247836,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012489.569, "dur": 3.197, + "args": { + "External id": 247837,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012616.173, "dur": 3.800, + "args": { + "External id": 247838,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012617.436, "dur": 2.350, + "args": { + "External id": 247839,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012622.669, "dur": 2.908, + "args": { + "External id": 247840,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012624.167, "dur": 1.291, + "args": { + "External id": 247841,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918012691.637, "dur": 103.673, + "args": { + "External id": 247842,"Sequence number": 2987668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918012693.468, "dur": 9.504, + "args": { + "External id": 247843,"Sequence number": 2987668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6674 + } + }, + { + "ph": "s", "id": 76, "pid": 4183438, "tid": 4183438, "ts": 667918012693.468, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918012696.387, "dur": 5.073, + "args": { + "External id": 247844,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012699.191, "dur": 1.714, + "args": { + "External id": 247845,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918012704.203, "dur": 90.751, + "args": { + "External id": 247846,"Sequence number": 2987669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012706.509, "dur": 4.899, + "args": { + "External id": 247847,"Sequence number": 2987669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012708.291, "dur": 2.979, + "args": { + "External id": 247848,"Sequence number": 2987669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6679 + } + }, + { + "ph": "s", "id": 75, "pid": 4183438, "tid": 4183438, "ts": 667918012708.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918012712.633, "dur": 73.937, + "args": { + "External id": 247849,"Sequence number": 2987670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6680 + } + }, + { + "ph": "s", "id": 74, "pid": 4183438, "tid": 4183438, "ts": 667918012712.633, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918012788.959, "dur": 5.401, + "args": { + "External id": 247850,"Sequence number": 2987671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6681 + } + }, + { + "ph": "s", "id": 73, "pid": 4183438, "tid": 4183438, "ts": 667918012788.959, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918012805.676, "dur": 69.107, + "args": { + "External id": 247851,"Sequence number": 2987672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918012806.822, "dur": 6.071, + "args": { + "External id": 247852,"Sequence number": 2987672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6683 + } + }, + { + "ph": "s", "id": 72, "pid": 4183438, "tid": 4183438, "ts": 667918012806.822, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918012808.478, "dur": 3.085, + "args": { + "External id": 247853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012810.436, "dur": 0.980, + "args": { + "External id": 247854,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918012813.803, "dur": 60.743, + "args": { + "External id": 247855,"Sequence number": 2987673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012815.150, "dur": 7.451, + "args": { + "External id": 247856,"Sequence number": 2987673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012816.291, "dur": 6.147, + "args": { + "External id": 247857,"Sequence number": 2987673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6688 + } + }, + { + "ph": "s", "id": 71, "pid": 4183438, "tid": 4183438, "ts": 667918012816.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918012823.101, "dur": 43.547, + "args": { + "External id": 247858,"Sequence number": 2987674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6689 + } + }, + { + "ph": "s", "id": 70, "pid": 4183438, "tid": 4183438, "ts": 667918012823.101, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918012868.589, "dur": 5.610, + "args": { + "External id": 247859,"Sequence number": 2987675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6690 + } + }, + { + "ph": "s", "id": 69, "pid": 4183438, "tid": 4183438, "ts": 667918012868.589, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918012895.859, "dur": 140.069, + "args": { + "External id": 247860,"Sequence number": 2987676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6691 + } + }, + { + "ph": "s", "id": 68, "pid": 4183438, "tid": 4183438, "ts": 667918012895.859, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918012934.291, "dur": 3.989, + "args": { + "External id": 247861,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918012968.128, "dur": 54.964, + "args": { + "External id": 247862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918012969.011, "dur": 5.437, + "args": { + "External id": 247863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918012970.319, "dur": 3.446, + "args": { + "External id": 247864,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918012972.644, "dur": 0.960, + "args": { + "External id": 247865,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918012975.116, "dur": 47.625, + "args": { + "External id": 247866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[11534336, 2816, 1], [1, 2816]], "Input Dims": [[16, 4096, 2816], [2816, 1024]], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918012976.685, "dur": 2.992, + "args": { + "External id": 247867,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918012977.714, "dur": 1.841, + "args": { + "External id": 247868,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918012980.691, "dur": 38.076, + "args": { + "External id": 247869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918013020.725, "dur": 1.429, + "args": { + "External id": 247870,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918013043.592, "dur": 25.819, + "args": { + "External id": 247871,"Sequence number": 2987677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 6702 + } + }, + { + "ph": "s", "id": 67, "pid": 4183438, "tid": 4183438, "ts": 667918013043.592, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918013102.705, "dur": 208.153, + "args": { + "External id": 247872,"Sequence number": 2987678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [], [], [], [], []], "Ev Idx": 6703 + } + }, + { + "ph": "s", "id": 66, "pid": 4183438, "tid": 4183438, "ts": 667918013102.705, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013121.090, "dur": 3.872, + "args": { + "External id": 247873,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013123.000, "dur": 1.787, + "args": { + "External id": 247874,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918013132.467, "dur": 7.659, + "args": { + "External id": 247875,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013135.307, "dur": 4.467, + "args": { + "External id": 247876,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918013146.353, "dur": 3.383, + "args": { + "External id": 247877,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013292.670, "dur": 5.032, + "args": { + "External id": 247878,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013294.799, "dur": 2.611, + "args": { + "External id": 247879,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918013330.699, "dur": 97.136, + "args": { + "External id": 247880,"Sequence number": 2987679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918013331.752, "dur": 8.875, + "args": { + "External id": 247881,"Sequence number": 2987679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6712 + } + }, + { + "ph": "s", "id": 65, "pid": 4183438, "tid": 4183438, "ts": 667918013331.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918013334.889, "dur": 4.311, + "args": { + "External id": 247882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013337.481, "dur": 1.518, + "args": { + "External id": 247883,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918013341.977, "dur": 85.632, + "args": { + "External id": 247884,"Sequence number": 2987680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013343.974, "dur": 4.778, + "args": { + "External id": 247885,"Sequence number": 2987680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013345.242, "dur": 3.381, + "args": { + "External id": 247886,"Sequence number": 2987680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6717 + } + }, + { + "ph": "s", "id": 64, "pid": 4183438, "tid": 4183438, "ts": 667918013345.242, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918013349.710, "dur": 71.523, + "args": { + "External id": 247887,"Sequence number": 2987681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6718 + } + }, + { + "ph": "s", "id": 63, "pid": 4183438, "tid": 4183438, "ts": 667918013349.710, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918013423.640, "dur": 3.290, + "args": { + "External id": 247888,"Sequence number": 2987682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6719 + } + }, + { + "ph": "s", "id": 62, "pid": 4183438, "tid": 4183438, "ts": 667918013423.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918013436.063, "dur": 62.061, + "args": { + "External id": 247889,"Sequence number": 2987683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918013437.165, "dur": 6.282, + "args": { + "External id": 247890,"Sequence number": 2987683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6721 + } + }, + { + "ph": "s", "id": 61, "pid": 4183438, "tid": 4183438, "ts": 667918013437.165, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918013438.660, "dur": 3.819, + "args": { + "External id": 247891,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013441.461, "dur": 0.822, + "args": { + "External id": 247892,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918013444.242, "dur": 53.698, + "args": { + "External id": 247893,"Sequence number": 2987684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013445.974, "dur": 3.800, + "args": { + "External id": 247894,"Sequence number": 2987684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013447.169, "dur": 2.478, + "args": { + "External id": 247895,"Sequence number": 2987684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6726 + } + }, + { + "ph": "s", "id": 60, "pid": 4183438, "tid": 4183438, "ts": 667918013447.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918013450.649, "dur": 41.080, + "args": { + "External id": 247896,"Sequence number": 2987685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6727 + } + }, + { + "ph": "s", "id": 59, "pid": 4183438, "tid": 4183438, "ts": 667918013450.649, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918013493.637, "dur": 3.956, + "args": { + "External id": 247897,"Sequence number": 2987686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6728 + } + }, + { + "ph": "s", "id": 58, "pid": 4183438, "tid": 4183438, "ts": 667918013493.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918013505.322, "dur": 68.090, + "args": { + "External id": 247898,"Sequence number": 2987687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918013506.186, "dur": 7.708, + "args": { + "External id": 247899,"Sequence number": 2987687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6730 + } + }, + { + "ph": "s", "id": 57, "pid": 4183438, "tid": 4183438, "ts": 667918013506.186, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918013507.703, "dur": 5.142, + "args": { + "External id": 247900,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013509.837, "dur": 2.781, + "args": { + "External id": 247901,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918013514.577, "dur": 58.608, + "args": { + "External id": 247902,"Sequence number": 2987688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013516.647, "dur": 5.065, + "args": { + "External id": 247903,"Sequence number": 2987688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013517.967, "dur": 3.590, + "args": { + "External id": 247904,"Sequence number": 2987688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6735 + } + }, + { + "ph": "s", "id": 56, "pid": 4183438, "tid": 4183438, "ts": 667918013517.967, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918013522.603, "dur": 44.120, + "args": { + "External id": 247905,"Sequence number": 2987689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6736 + } + }, + { + "ph": "s", "id": 55, "pid": 4183438, "tid": 4183438, "ts": 667918013522.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918013568.403, "dur": 4.474, + "args": { + "External id": 247906,"Sequence number": 2987690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6737 + } + }, + { + "ph": "s", "id": 54, "pid": 4183438, "tid": 4183438, "ts": 667918013568.403, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013589.738, "dur": 4.196, + "args": { + "External id": 247907,"Sequence number": 2987691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013590.815, "dur": 2.751, + "args": { + "External id": 247908,"Sequence number": 2987691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6739 + } + }, + { + "ph": "s", "id": 53, "pid": 4183438, "tid": 4183438, "ts": 667918013590.815, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013600.688, "dur": 3.496, + "args": { + "External id": 247909,"Sequence number": 2987692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013601.878, "dur": 2.172, + "args": { + "External id": 247910,"Sequence number": 2987692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6741 + } + }, + { + "ph": "s", "id": 52, "pid": 4183438, "tid": 4183438, "ts": 667918013601.878, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918013608.495, "dur": 3.528, + "args": { + "External id": 247911,"Sequence number": 2987693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918013609.735, "dur": 2.161, + "args": { + "External id": 247912,"Sequence number": 2987693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 16, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6743 + } + }, + { + "ph": "s", "id": 51, "pid": 4183438, "tid": 4183438, "ts": 667918013609.735, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918013641.673, "dur": 201.660, + "args": { + "External id": 247913,"Sequence number": 2987694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "s", "id": 50, "pid": 4183438, "tid": 4183438, "ts": 667918013641.673, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918013698.196, "dur": 11.855, + "args": { + "External id": 247914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013701.116, "dur": 8.026, + "args": { + "External id": 247915,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918013865.267, "dur": 104.696, + "args": { + "External id": 247916,"Sequence number": 2987695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "s", "id": 49, "pid": 4183438, "tid": 4183438, "ts": 667918013865.267, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918013880.944, "dur": 7.391, + "args": { + "External id": 247917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918013883.272, "dur": 4.690, + "args": { + "External id": 247918,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 4183438, "tid": 4183438, + "ts": 667918013996.409, "dur": 182.398, + "args": { + "External id": 247919,"Sequence number": 2987696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 6750 + } + }, + { + "ph": "s", "id": 48, "pid": 4183438, "tid": 4183438, "ts": 667918013996.409, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918014022.414, "dur": 130.673, + "args": { + "External id": 247920,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [4194304, 1024, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [16, 4096, 16, 64], [16, 4096, 16, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918014073.284, "dur": 7.761, + "args": { + "External id": 247921,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[4194304, 1024, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 16, 64], [], [], [], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014075.890, "dur": 4.743, + "args": { + "External id": 247922,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 16, 64]", "[4194304, 1024, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014083.843, "dur": 4.224, + "args": { + "External id": 247923,"Record function id": 0, "Concrete Inputs": ["[16, 16, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014089.675, "dur": 2.066, + "args": { + "External id": 247924,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014094.499, "dur": 5.501, + "args": { + "External id": 247925,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 4183438, "tid": 4183438, + "ts": 667918014164.218, "dur": 4.925, + "args": { + "External id": 247926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 64, 1]], "Input Dims": [[16, 4096, 16, 64]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014184.523, "dur": 5.540, + "args": { + "External id": 247927,"Sequence number": 2987697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014185.936, "dur": 4.000, + "args": { + "External id": 247928,"Sequence number": 2987697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 64, 1], []], "Input Dims": [[16, 4096, 16, 64], []], "Ev Idx": 6759 + } + }, + { + "ph": "s", "id": 47, "pid": 4183438, "tid": 4183438, "ts": 667918014185.936, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918014220.184, "dur": 112.152, + "args": { + "External id": 247929,"Sequence number": 2987698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [1024, 1024], []], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918014221.603, "dur": 9.873, + "args": { + "External id": 247930,"Sequence number": 2987698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[1024, 1024]], "Ev Idx": 6761 + } + }, + { + "ph": "s", "id": 46, "pid": 4183438, "tid": 4183438, "ts": 667918014221.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918014225.271, "dur": 4.826, + "args": { + "External id": 247931,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[1024, 1024], [], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014227.991, "dur": 1.683, + "args": { + "External id": 247932,"Record function id": 0, "Concrete Inputs": ["", "[1024, 1024]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[1024, 1024], [], [], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918014233.088, "dur": 98.822, + "args": { + "External id": 247933,"Sequence number": 2987699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 1024]], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014235.431, "dur": 4.134, + "args": { + "External id": 247934,"Sequence number": 2987699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014236.882, "dur": 2.548, + "args": { + "External id": 247935,"Sequence number": 2987699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6766 + } + }, + { + "ph": "s", "id": 45, "pid": 4183438, "tid": 4183438, "ts": 667918014236.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918014240.842, "dur": 84.328, + "args": { + "External id": 247936,"Sequence number": 2987700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 1024]], "Ev Idx": 6767 + } + }, + { + "ph": "s", "id": 44, "pid": 4183438, "tid": 4183438, "ts": 667918014240.842, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918014327.361, "dur": 3.848, + "args": { + "External id": 247937,"Sequence number": 2987701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6768 + } + }, + { + "ph": "s", "id": 43, "pid": 4183438, "tid": 4183438, "ts": 667918014327.361, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918014367.728, "dur": 206.177, + "args": { + "External id": 247938,"Sequence number": 2987702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 1], [1], [], [4194304, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [1024], [], [16, 4096, 1024], [], [], [], []], "Ev Idx": 6769 + } + }, + { + "ph": "s", "id": 42, "pid": 4183438, "tid": 4183438, "ts": 667918014367.728, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014386.091, "dur": 3.192, + "args": { + "External id": 247939,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014387.504, "dur": 1.644, + "args": { + "External id": 247940,"Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 4183438, "tid": 4183438, + "ts": 667918014393.000, "dur": 4.050, + "args": { + "External id": 247941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1024, 1]], "Input Dims": [[16, 4096, 1024], [65536, 1024]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014394.730, "dur": 2.215, + "args": { + "External id": 247942,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014395.656, "dur": 1.180, + "args": { + "External id": 247943,"Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918014405.098, "dur": 7.668, + "args": { + "External id": 247944,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014407.763, "dur": 4.650, + "args": { + "External id": 247945,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014419.789, "dur": 3.418, + "args": { + "External id": 247946,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014426.681, "dur": 3.353, + "args": { + "External id": 247947,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014551.346, "dur": 5.740, + "args": { + "External id": 247948,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014552.919, "dur": 3.936, + "args": { + "External id": 247949,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014559.726, "dur": 2.619, + "args": { + "External id": 247950,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014560.993, "dur": 1.131, + "args": { + "External id": 247951,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918014591.406, "dur": 135.844, + "args": { + "External id": 247952,"Sequence number": 2987703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918014592.626, "dur": 6.984, + "args": { + "External id": 247953,"Sequence number": 2987703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6784 + } + }, + { + "ph": "s", "id": 41, "pid": 4183438, "tid": 4183438, "ts": 667918014592.626, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918014595.061, "dur": 3.492, + "args": { + "External id": 247954,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014597.454, "dur": 0.887, + "args": { + "External id": 247955,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918014601.116, "dur": 125.682, + "args": { + "External id": 247956,"Sequence number": 2987704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014602.728, "dur": 6.239, + "args": { + "External id": 247957,"Sequence number": 2987704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014603.938, "dur": 4.900, + "args": { + "External id": 247958,"Sequence number": 2987704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6789 + } + }, + { + "ph": "s", "id": 40, "pid": 4183438, "tid": 4183438, "ts": 667918014603.938, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918014609.654, "dur": 108.169, + "args": { + "External id": 247959,"Sequence number": 2987705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6790 + } + }, + { + "ph": "s", "id": 39, "pid": 4183438, "tid": 4183438, "ts": 667918014609.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918014721.984, "dur": 4.167, + "args": { + "External id": 247960,"Sequence number": 2987706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6791 + } + }, + { + "ph": "s", "id": 38, "pid": 4183438, "tid": 4183438, "ts": 667918014721.984, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918014737.624, "dur": 82.180, + "args": { + "External id": 247961,"Sequence number": 2987707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 1], [1024, 1], []], "Input Dims": [[16, 4096, 1024], [2816, 1024], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918014738.751, "dur": 12.487, + "args": { + "External id": 247962,"Sequence number": 2987707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[2816, 1024]], "Ev Idx": 6793 + } + }, + { + "ph": "s", "id": 37, "pid": 4183438, "tid": 4183438, "ts": 667918014738.751, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918014743.714, "dur": 5.884, + "args": { + "External id": 247963,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[2816, 1024], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014748.220, "dur": 1.235, + "args": { + "External id": 247964,"Record function id": 0, "Concrete Inputs": ["", "[1024, 2816]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[2816, 1024], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918014752.264, "dur": 67.281, + "args": { + "External id": 247965,"Sequence number": 2987708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4194304, 1024, 1], [1, 1024]], "Input Dims": [[16, 4096, 1024], [1024, 2816]], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014754.068, "dur": 5.807, + "args": { + "External id": 247966,"Sequence number": 2987708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014755.433, "dur": 4.233, + "args": { + "External id": 247967,"Sequence number": 2987708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6798 + } + }, + { + "ph": "s", "id": 36, "pid": 4183438, "tid": 4183438, "ts": 667918014755.433, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918014760.531, "dur": 51.853, + "args": { + "External id": 247968,"Sequence number": 2987709, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[65536, 1024], [1024, 2816]], "Ev Idx": 6799 + } + }, + { + "ph": "s", "id": 35, "pid": 4183438, "tid": 4183438, "ts": 667918014760.531, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918014814.193, "dur": 5.008, + "args": { + "External id": 247969,"Sequence number": 2987710, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2816, 1], []], "Input Dims": [[65536, 2816], []], "Ev Idx": 6800 + } + }, + { + "ph": "s", "id": 34, "pid": 4183438, "tid": 4183438, "ts": 667918014814.193, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918014840.743, "dur": 152.067, + "args": { + "External id": 247970,"Sequence number": 2987711, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6801 + } + }, + { + "ph": "s", "id": 33, "pid": 4183438, "tid": 4183438, "ts": 667918014840.743, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918014879.889, "dur": 4.568, + "args": { + "External id": 247971,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2816]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918014918.256, "dur": 60.275, + "args": { + "External id": 247972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[11534336, 2816, 1], [2816, 1], []], "Input Dims": [[16, 4096, 2816], [1024, 2816], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918014919.036, "dur": 5.329, + "args": { + "External id": 247973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2816, 1]], "Input Dims": [[1024, 2816]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918014920.494, "dur": 2.810, + "args": { + "External id": 247974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2816, 1], [], []], "Input Dims": [[1024, 2816], [], []], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918014922.505, "dur": 0.629, + "args": { + "External id": 247975,"Record function id": 0, "Concrete Inputs": ["", "[2816, 1024]", "[1, 2816]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2816, 1], [], [], []], "Input Dims": [[1024, 2816], [], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918014925.339, "dur": 52.753, + "args": { + "External id": 247976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[11534336, 2816, 1], [1, 2816]], "Input Dims": [[16, 4096, 2816], [2816, 1024]], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 4183438, "tid": 4183438, + "ts": 667918014927.521, "dur": 3.298, + "args": { + "External id": 247977,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918014929.048, "dur": 1.663, + "args": { + "External id": 247978,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2816]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 2816, 1], []], "Input Dims": [[16, 4096, 2816], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918014931.779, "dur": 42.445, + "args": { + "External id": 247979,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2816, 1], [1, 2816]], "Input Dims": [[65536, 2816], [2816, 1024]], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 4183438, "tid": 4183438, + "ts": 667918014976.346, "dur": 1.109, + "args": { + "External id": 247980,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918015001.317, "dur": 24.909, + "args": { + "External id": 247981,"Sequence number": 2987712, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 1], [4194304, 1024, 1], []], "Input Dims": [[16, 4096, 1024], [16, 4096, 1024], []], "Ev Idx": 6812 + } + }, + { + "ph": "s", "id": 32, "pid": 4183438, "tid": 4183438, "ts": 667918015001.317, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 4183438, "tid": 4183438, + "ts": 667918015044.688, "dur": 41.558, + "args": { + "External id": 247982,"Sequence number": 2987713, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[4194304, 1024, 1], [4194304, 1024, 1], [4194304, 1024, 1], [4194304, 1024, 1]], []], "Input Dims": [[[16, 4096, 1024], [16, 4096, 1024], [16, 4096, 1024], [16, 4096, 1024]], []], "Ev Idx": 6813 + } + }, + { + "ph": "s", "id": 31, "pid": 4183438, "tid": 4183438, "ts": 667918015044.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 4183438, "tid": 4183438, + "ts": 667918015053.506, "dur": 27.952, + "args": { + "External id": 247983,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[4194304, 1024, 1], [4194304, 1024, 1], [4194304, 1024, 1], [4194304, 1024, 1]], []], "Input Dims": [[[16, 4096, 1024], [16, 4096, 1024], [16, 4096, 1024], [16, 4096, 1024]], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918015083.256, "dur": 1.412, + "args": { + "External id": 247984,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 4183438, "tid": 4183438, + "ts": 667918015120.994, "dur": 65.791, + "args": { + "External id": 247985,"Record function id": 0, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 4183438, "tid": 4183438, + "ts": 667918015188.091, "dur": 212.536, + "args": { + "External id": 247986,"Record function id": 0, "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918015243.182, "dur": 148.735, + "args": { + "External id": 247987,"Sequence number": 2987714, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [16777216, 4096, 1024, 1]], "Input Dims": [[1024], [16, 4096, 4, 1024]], "Ev Idx": 6818 + } + }, + { + "ph": "s", "id": 30, "pid": 4183438, "tid": 4183438, "ts": 667918015243.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 4183438, "tid": 4183438, + "ts": 667918015312.368, "dur": 39.865, + "args": { + "External id": 247988,"kernel_hash": "ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "1024", "1", "9.9999999999999995e-07", "True", "1024", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/cg/ccgi3dilqw7bx5zirazjzjmgxoha7dpoyovs4nylqgmksmgrgn6b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 1024], [262144, 1024], [1024], [262144], [], [], [], [], [], [], [], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918015484.663, "dur": 45.210, + "args": { + "External id": 247989,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015487.493, "dur": 5.169, + "args": { + "External id": 247990,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015495.387, "dur": 34.204, + "args": { + "External id": 247991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015498.982, "dur": 30.095, + "args": { + "External id": 247992,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918015534.714, "dur": 21.285, + "args": { + "External id": 247993,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015535.959, "dur": 2.895, + "args": { + "External id": 247994,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015539.727, "dur": 15.987, + "args": { + "External id": 247995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015540.631, "dur": 14.630, + "args": { + "External id": 247996,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918015559.299, "dur": 18.842, + "args": { + "External id": 247997,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015560.414, "dur": 2.618, + "args": { + "External id": 247998,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015564.099, "dur": 13.775, + "args": { + "External id": 247999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015565.120, "dur": 12.353, + "args": { + "External id": 248000,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918015587.081, "dur": 0.890, + "args": { + "External id": 248001,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 4183438, "tid": 4183438, + "ts": 667918015596.219, "dur": 9.345, + "args": { + "External id": 248002,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015601.849, "dur": 2.060, + "args": { + "External id": 248003,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015612.035, "dur": 6.833, + "args": { + "External id": 248004,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015615.879, "dur": 1.253, + "args": { + "External id": 248005,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015620.289, "dur": 3.489, + "args": { + "External id": 248006,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015622.271, "dur": 0.757, + "args": { + "External id": 248007,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015625.577, "dur": 3.284, + "args": { + "External id": 248008,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015627.412, "dur": 0.898, + "args": { + "External id": 248009,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015632.401, "dur": 3.271, + "args": { + "External id": 248010,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015634.461, "dur": 0.659, + "args": { + "External id": 248011,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015636.774, "dur": 3.273, + "args": { + "External id": 248012,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015638.636, "dur": 0.743, + "args": { + "External id": 248013,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015641.405, "dur": 3.549, + "args": { + "External id": 248014,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4096, 4], [], [], [], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015643.430, "dur": 1.038, + "args": { + "External id": 248015,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918015649.363, "dur": 43.760, + "args": { + "External id": 248016,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4096, 4], [], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015653.380, "dur": 38.056, + "args": { + "External id": 248017,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015699.810, "dur": 4.208, + "args": { + "External id": 248018,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015702.624, "dur": 0.748, + "args": { + "External id": 248019,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918015707.253, "dur": 7.179, + "args": { + "External id": 248020,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015712.207, "dur": 1.144, + "args": { + "External id": 248021,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015715.649, "dur": 3.627, + "args": { + "External id": 248022,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015717.818, "dur": 0.904, + "args": { + "External id": 248023,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015722.651, "dur": 6.443, + "args": { + "External id": 248024,"Sequence number": 2987715, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "s", "id": 29, "pid": 4183438, "tid": 4183438, "ts": 667918015722.651, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015726.351, "dur": 1.066, + "args": { + "External id": 248025,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015730.295, "dur": 4.753, + "args": { + "External id": 248026,"Sequence number": 2987716, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 6857 + } + }, + { + "ph": "s", "id": 28, "pid": 4183438, "tid": 4183438, "ts": 667918015730.295, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015733.306, "dur": 0.968, + "args": { + "External id": 248027,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918015736.384, "dur": 5.719, + "args": { + "External id": 248028,"Sequence number": 2987717, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 6859 + } + }, + { + "ph": "s", "id": 27, "pid": 4183438, "tid": 4183438, "ts": 667918015736.384, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015740.595, "dur": 0.718, + "args": { + "External id": 248029,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918015743.402, "dur": 4.804, + "args": { + "External id": 248030,"Sequence number": 2987718, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "s", "id": 26, "pid": 4183438, "tid": 4183438, "ts": 667918015743.402, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015746.446, "dur": 0.982, + "args": { + "External id": 248031,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918015753.553, "dur": 45.674, + "args": { + "External id": 248032,"Sequence number": 2987719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918015756.293, "dur": 42.713, + "args": { + "External id": 248033,"Sequence number": 2987719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015759.119, "dur": 8.663, + "args": { + "External id": 248034,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015761.614, "dur": 5.514, + "args": { + "External id": 248035,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918015769.715, "dur": 28.759, + "args": { + "External id": 248036,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918015826.281, "dur": 4.153, + "args": { + "External id": 248037,"Sequence number": 2987719, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 6868 + } + }, + { + "ph": "s", "id": 25, "pid": 4183438, "tid": 4183438, "ts": 667918015826.281, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918015832.808, "dur": 1.200, + "args": { + "External id": 248038,"Sequence number": 2987720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918015861.234, "dur": 21450.326, + "args": { + "External id": 248039,"Sequence number": 2987720, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536], [32000, 1024], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "s", "id": 24, "pid": 4183438, "tid": 4183438, "ts": 667918015861.234, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918015874.834, "dur": 30.516, + "args": { + "External id": 248040,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918015875.634, "dur": 29.510, + "args": { + "External id": 248041,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015877.094, "dur": 6.137, + "args": { + "External id": 248042,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015878.705, "dur": 4.111, + "args": { + "External id": 248043,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918015883.921, "dur": 20.691, + "args": { + "External id": 248044,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [4096, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015923.399, "dur": 37.922, + "args": { + "External id": 248045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015933.958, "dur": 7.443, + "args": { + "External id": 248046,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015936.721, "dur": 4.347, + "args": { + "External id": 248047,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015942.359, "dur": 18.768, + "args": { + "External id": 248048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015943.848, "dur": 16.814, + "args": { + "External id": 248049,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015964.717, "dur": 23.732, + "args": { + "External id": 248050,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918015965.873, "dur": 5.262, + "args": { + "External id": 248051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918015967.583, "dur": 3.307, + "args": { + "External id": 248052,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015972.065, "dur": 16.182, + "args": { + "External id": 248053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015972.557, "dur": 15.311, + "args": { + "External id": 248054,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[32000, 1024], []], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918015992.971, "dur": 21.115, + "args": { + "External id": 248055,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918015994.544, "dur": 3.117, + "args": { + "External id": 248056,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918015998.619, "dur": 15.214, + "args": { + "External id": 248057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918015999.609, "dur": 13.692, + "args": { + "External id": 248058,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 4183438, + "ts": 667918016021.211, "dur": 28.542, + "args": { + "External id": 248059,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918016053.937, "dur": 62.746, + "args": { + "External id": 248060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918016057.535, "dur": 58.612, + "args": { + "External id": 248061,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918016063.899, "dur": 0.975, + "args": { + "External id": 248062,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918016066.497, "dur": 28.846, + "args": { + "External id": 248063,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918016071.393, "dur": 23.762, + "args": { + "External id": 248064,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918016073.946, "dur": 3.305, + "args": { + "External id": 248065,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918016078.483, "dur": 16.259, + "args": { + "External id": 248066,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918016121.082, "dur": 15179.391, + "args": { + "External id": 248067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918016123.576, "dur": 15175.369, + "args": { + "External id": 248068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918031313.105, "dur": 7.914, + "args": { + "External id": 248069,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031317.928, "dur": 1.142, + "args": { + "External id": 248070,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918031326.856, "dur": 115.749, + "args": { + "External id": 248071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918031328.647, "dur": 7.021, + "args": { + "External id": 248072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918031331.216, "dur": 3.575, + "args": { + "External id": 248073,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031333.445, "dur": 1.131, + "args": { + "External id": 248074,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918031337.407, "dur": 104.364, + "args": { + "External id": 248075,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918031339.682, "dur": 101.346, + "args": { + "External id": 248076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918031446.672, "dur": 4.484, + "args": { + "External id": 248077,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031449.048, "dur": 0.773, + "args": { + "External id": 248078,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918031461.359, "dur": 4.289, + "args": { + "External id": 248079,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918031476.434, "dur": 7.726, + "args": { + "External id": 248080,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918031479.570, "dur": 4.301, + "args": { + "External id": 248081,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918031636.260, "dur": 286.047, + "args": { + "External id": 248082,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918031639.963, "dur": 4.879, + "args": { + "External id": 248083,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918031648.269, "dur": 273.097, + "args": { + "External id": 248084,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918031650.889, "dur": 39.358, + "args": { + "External id": 248085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918031694.351, "dur": 34.947, + "args": { + "External id": 248086,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918031731.825, "dur": 6.988, + "args": { + "External id": 248087,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 6918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031737.035, "dur": 1.374, + "args": { + "External id": 248088,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918031740.401, "dur": 30.651, + "args": { + "External id": 248089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918031742.250, "dur": 1.782, + "args": { + "External id": 248090,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918031747.014, "dur": 23.761, + "args": { + "External id": 248091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918031751.930, "dur": 3.514, + "args": { + "External id": 248092,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918031773.612, "dur": 24.250, + "args": { + "External id": 248093,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918031800.887, "dur": 17.831, + "args": { + "External id": 248094,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918031822.627, "dur": 18.912, + "args": { + "External id": 248095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918031844.576, "dur": 14.689, + "args": { + "External id": 248096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918031862.387, "dur": 26.007, + "args": { + "External id": 248097,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918031866.864, "dur": 1.741, + "args": { + "External id": 248098,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031871.515, "dur": 0.630, + "args": { + "External id": 248099,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918031891.494, "dur": 15.394, + "args": { + "External id": 248100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918031908.870, "dur": 11.431, + "args": { + "External id": 248101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918031930.976, "dur": 2.757, + "args": { + "External id": 248102,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918031941.644, "dur": 5.119, + "args": { + "External id": 248103,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918031944.633, "dur": 1.095, + "args": { + "External id": 248104,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918032035.917, "dur": 66.568, + "args": { + "External id": 248105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918032107.955, "dur": 5.331, + "args": { + "External id": 248106,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032111.194, "dur": 0.758, + "args": { + "External id": 248107,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918032115.189, "dur": 28.824, + "args": { + "External id": 248108,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918032150.053, "dur": 7.716, + "args": { + "External id": 248109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918032152.271, "dur": 4.620, + "args": { + "External id": 248110,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032155.121, "dur": 1.545, + "args": { + "External id": 248111,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918032162.656, "dur": 73.281, + "args": { + "External id": 248112,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918032164.354, "dur": 70.561, + "args": { + "External id": 248113,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918032243.132, "dur": 19.193, + "args": { + "External id": 248114,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918032269.262, "dur": 5.274, + "args": { + "External id": 248115,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032272.122, "dur": 1.006, + "args": { + "External id": 248116,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918032279.432, "dur": 63.443, + "args": { + "External id": 248117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918032280.758, "dur": 11.467, + "args": { + "External id": 248118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918032282.094, "dur": 9.474, + "args": { + "External id": 248119,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032290.446, "dur": 0.968, + "args": { + "External id": 248120,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918032293.583, "dur": 48.887, + "args": { + "External id": 248121,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918032294.646, "dur": 47.219, + "args": { + "External id": 248122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918032347.937, "dur": 4.594, + "args": { + "External id": 248123,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032350.136, "dur": 1.069, + "args": { + "External id": 248124,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918032359.271, "dur": 2.226, + "args": { + "External id": 248125,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918032370.852, "dur": 9.673, + "args": { + "External id": 248126,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918032373.888, "dur": 6.294, + "args": { + "External id": 248127,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918032488.653, "dur": 291.546, + "args": { + "External id": 248128,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918032490.967, "dur": 2.334, + "args": { + "External id": 248129,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918032496.034, "dur": 283.554, + "args": { + "External id": 248130,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918032497.894, "dur": 0.368, + "args": { + "External id": 248131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918032499.903, "dur": 27.205, + "args": { + "External id": 248132,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918032528.958, "dur": 3.713, + "args": { + "External id": 248133,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032531.451, "dur": 0.827, + "args": { + "External id": 248134,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918032534.083, "dur": 38.297, + "args": { + "External id": 248135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918032535.364, "dur": 1.773, + "args": { + "External id": 248136,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918032538.764, "dur": 33.293, + "args": { + "External id": 248137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918032544.138, "dur": 2.800, + "args": { + "External id": 248138,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918032574.131, "dur": 38.267, + "args": { + "External id": 248139,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918032614.554, "dur": 28.703, + "args": { + "External id": 248140,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918032645.855, "dur": 57.027, + "args": { + "External id": 248141,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918032706.114, "dur": 15.098, + "args": { + "External id": 248142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918032723.449, "dur": 24.447, + "args": { + "External id": 248143,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918032725.971, "dur": 2.381, + "args": { + "External id": 248144,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032730.991, "dur": 0.977, + "args": { + "External id": 248145,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918032750.536, "dur": 13.853, + "args": { + "External id": 248146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918032765.903, "dur": 12.669, + "args": { + "External id": 248147,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918032788.810, "dur": 2.763, + "args": { + "External id": 248148,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918032801.171, "dur": 4.236, + "args": { + "External id": 248149,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032803.830, "dur": 0.549, + "args": { + "External id": 248150,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918032882.275, "dur": 54.662, + "args": { + "External id": 248151,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918032942.114, "dur": 5.386, + "args": { + "External id": 248152,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032945.146, "dur": 1.051, + "args": { + "External id": 248153,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918032949.094, "dur": 26.290, + "args": { + "External id": 248154,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918032980.428, "dur": 11.596, + "args": { + "External id": 248155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918032987.297, "dur": 3.977, + "args": { + "External id": 248156,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918032989.950, "dur": 1.124, + "args": { + "External id": 248157,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918032995.294, "dur": 44.010, + "args": { + "External id": 248158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918032996.834, "dur": 41.973, + "args": { + "External id": 248159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918033043.511, "dur": 15.877, + "args": { + "External id": 248160,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033065.099, "dur": 3.686, + "args": { + "External id": 248161,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033067.332, "dur": 0.537, + "args": { + "External id": 248162,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918033073.099, "dur": 55.112, + "args": { + "External id": 248163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918033074.079, "dur": 6.848, + "args": { + "External id": 248164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918033075.160, "dur": 5.169, + "args": { + "External id": 248165,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033077.099, "dur": 2.999, + "args": { + "External id": 248166,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918033082.050, "dur": 45.733, + "args": { + "External id": 248167,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918033083.180, "dur": 44.051, + "args": { + "External id": 248168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033132.758, "dur": 4.210, + "args": { + "External id": 248169,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033135.118, "dur": 0.735, + "args": { + "External id": 248170,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918033143.153, "dur": 1.861, + "args": { + "External id": 248171,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033153.878, "dur": 6.754, + "args": { + "External id": 248172,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033156.409, "dur": 3.835, + "args": { + "External id": 248173,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918033270.619, "dur": 200.836, + "args": { + "External id": 248174,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033273.507, "dur": 3.101, + "args": { + "External id": 248175,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918033278.383, "dur": 192.459, + "args": { + "External id": 248176,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918033280.055, "dur": 0.587, + "args": { + "External id": 248177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918033284.211, "dur": 24.269, + "args": { + "External id": 248178,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918033310.373, "dur": 3.921, + "args": { + "External id": 248179,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033312.771, "dur": 1.142, + "args": { + "External id": 248180,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918033315.626, "dur": 26.738, + "args": { + "External id": 248181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033317.343, "dur": 3.531, + "args": { + "External id": 248182,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918033322.245, "dur": 19.840, + "args": { + "External id": 248183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918033325.181, "dur": 2.926, + "args": { + "External id": 248184,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918033343.670, "dur": 24.212, + "args": { + "External id": 248185,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918033369.654, "dur": 15.645, + "args": { + "External id": 248186,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918033387.717, "dur": 15.459, + "args": { + "External id": 248187,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918033404.809, "dur": 13.567, + "args": { + "External id": 248188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918033420.090, "dur": 22.334, + "args": { + "External id": 248189,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918033422.551, "dur": 1.901, + "args": { + "External id": 248190,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033427.030, "dur": 0.948, + "args": { + "External id": 248191,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918033444.374, "dur": 12.645, + "args": { + "External id": 248192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918033458.427, "dur": 11.139, + "args": { + "External id": 248193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918033478.234, "dur": 2.819, + "args": { + "External id": 248194,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033490.728, "dur": 3.810, + "args": { + "External id": 248195,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033493.130, "dur": 0.507, + "args": { + "External id": 248196,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918033561.323, "dur": 47.029, + "args": { + "External id": 248197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033612.919, "dur": 5.289, + "args": { + "External id": 248198,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033615.984, "dur": 1.185, + "args": { + "External id": 248199,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918033619.676, "dur": 25.287, + "args": { + "External id": 248200,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918033649.781, "dur": 46.430, + "args": { + "External id": 248201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918033688.479, "dur": 6.600, + "args": { + "External id": 248202,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033691.501, "dur": 3.119, + "args": { + "External id": 248203,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918033699.900, "dur": 51.534, + "args": { + "External id": 248204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918033701.133, "dur": 49.282, + "args": { + "External id": 248205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918033756.030, "dur": 17.341, + "args": { + "External id": 248206,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033779.339, "dur": 4.877, + "args": { + "External id": 248207,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033782.355, "dur": 0.861, + "args": { + "External id": 248208,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918033788.361, "dur": 51.304, + "args": { + "External id": 248209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918033789.394, "dur": 4.784, + "args": { + "External id": 248210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918033790.692, "dur": 2.936, + "args": { + "External id": 248211,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033792.636, "dur": 0.853, + "args": { + "External id": 248212,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918033795.147, "dur": 44.100, + "args": { + "External id": 248213,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918033796.388, "dur": 42.222, + "args": { + "External id": 248214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918033843.843, "dur": 4.171, + "args": { + "External id": 248215,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918033845.831, "dur": 1.051, + "args": { + "External id": 248216,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918033854.224, "dur": 1.995, + "args": { + "External id": 248217,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033864.983, "dur": 7.528, + "args": { + "External id": 248218,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033867.607, "dur": 4.635, + "args": { + "External id": 248219,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918033963.180, "dur": 210.501, + "args": { + "External id": 248220,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918033965.700, "dur": 2.658, + "args": { + "External id": 248221,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918033972.547, "dur": 200.538, + "args": { + "External id": 248222,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918033973.928, "dur": 0.431, + "args": { + "External id": 248223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918033976.068, "dur": 22.723, + "args": { + "External id": 248224,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918034000.400, "dur": 5.500, + "args": { + "External id": 248225,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034002.733, "dur": 2.853, + "args": { + "External id": 248226,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918034007.353, "dur": 22.858, + "args": { + "External id": 248227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918034008.631, "dur": 1.761, + "args": { + "External id": 248228,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918034011.480, "dur": 18.428, + "args": { + "External id": 248229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918034014.191, "dur": 2.444, + "args": { + "External id": 248230,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918034031.860, "dur": 21.775, + "args": { + "External id": 248231,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918034055.305, "dur": 13.494, + "args": { + "External id": 248232,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918034071.864, "dur": 16.032, + "args": { + "External id": 248233,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918034089.357, "dur": 29.026, + "args": { + "External id": 248234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918034120.508, "dur": 23.309, + "args": { + "External id": 248235,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918034122.266, "dur": 1.809, + "args": { + "External id": 248236,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034126.433, "dur": 2.854, + "args": { + "External id": 248237,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918034145.621, "dur": 12.982, + "args": { + "External id": 248238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918034159.720, "dur": 12.075, + "args": { + "External id": 248239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918034180.729, "dur": 1.982, + "args": { + "External id": 248240,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918034191.127, "dur": 20.583, + "args": { + "External id": 248241,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034209.508, "dur": 0.666, + "args": { + "External id": 248242,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918034286.074, "dur": 52.321, + "args": { + "External id": 248243,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918034343.347, "dur": 6.021, + "args": { + "External id": 248244,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034346.630, "dur": 1.632, + "args": { + "External id": 248245,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918034350.956, "dur": 26.122, + "args": { + "External id": 248246,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918034382.011, "dur": 6.787, + "args": { + "External id": 248247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918034383.734, "dur": 4.291, + "args": { + "External id": 248248,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034386.394, "dur": 1.474, + "args": { + "External id": 248249,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918034391.647, "dur": 44.224, + "args": { + "External id": 248250,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918034393.280, "dur": 41.938, + "args": { + "External id": 248251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918034439.818, "dur": 16.855, + "args": { + "External id": 248252,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918034462.136, "dur": 4.233, + "args": { + "External id": 248253,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034464.569, "dur": 0.887, + "args": { + "External id": 248254,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918034470.544, "dur": 51.295, + "args": { + "External id": 248255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918034471.859, "dur": 4.825, + "args": { + "External id": 248256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918034473.182, "dur": 2.918, + "args": { + "External id": 248257,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034475.244, "dur": 0.724, + "args": { + "External id": 248258,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918034477.808, "dur": 43.612, + "args": { + "External id": 248259,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918034478.702, "dur": 41.986, + "args": { + "External id": 248260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918034525.971, "dur": 4.180, + "args": { + "External id": 248261,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034528.082, "dur": 0.954, + "args": { + "External id": 248262,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918034536.239, "dur": 1.836, + "args": { + "External id": 248263,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918034545.973, "dur": 6.315, + "args": { + "External id": 248264,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918034548.337, "dur": 3.640, + "args": { + "External id": 248265,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918034633.671, "dur": 243.212, + "args": { + "External id": 248266,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918034638.260, "dur": 2.380, + "args": { + "External id": 248267,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918034642.424, "dur": 233.781, + "args": { + "External id": 248268,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918034643.914, "dur": 0.482, + "args": { + "External id": 248269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918034645.848, "dur": 63.052, + "args": { + "External id": 248270,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918034712.182, "dur": 6.397, + "args": { + "External id": 248271,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034717.041, "dur": 1.241, + "args": { + "External id": 248272,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918034719.579, "dur": 24.297, + "args": { + "External id": 248273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918034721.299, "dur": 1.753, + "args": { + "External id": 248274,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918034724.668, "dur": 18.869, + "args": { + "External id": 248275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918034727.862, "dur": 2.538, + "args": { + "External id": 248276,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918034745.680, "dur": 22.056, + "args": { + "External id": 248277,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918034769.234, "dur": 15.754, + "args": { + "External id": 248278,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918034788.008, "dur": 15.772, + "args": { + "External id": 248279,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918034805.449, "dur": 14.313, + "args": { + "External id": 248280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918034821.909, "dur": 24.074, + "args": { + "External id": 248281,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918034826.043, "dur": 1.962, + "args": { + "External id": 248282,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034830.544, "dur": 1.011, + "args": { + "External id": 248283,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918034848.572, "dur": 13.048, + "args": { + "External id": 248284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918034862.828, "dur": 11.967, + "args": { + "External id": 248285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918034884.421, "dur": 2.713, + "args": { + "External id": 248286,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918034908.853, "dur": 4.143, + "args": { + "External id": 248287,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918034911.702, "dur": 0.462, + "args": { + "External id": 248288,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918034987.976, "dur": 53.435, + "args": { + "External id": 248289,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035045.744, "dur": 5.064, + "args": { + "External id": 248290,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035048.866, "dur": 0.882, + "args": { + "External id": 248291,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918035052.551, "dur": 25.784, + "args": { + "External id": 248292,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918035083.788, "dur": 9.618, + "args": { + "External id": 248293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918035085.628, "dur": 6.830, + "args": { + "External id": 248294,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035088.368, "dur": 3.862, + "args": { + "External id": 248295,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918035096.112, "dur": 44.648, + "args": { + "External id": 248296,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918035097.752, "dur": 42.363, + "args": { + "External id": 248297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918035144.956, "dur": 13.955, + "args": { + "External id": 248298,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035164.260, "dur": 4.310, + "args": { + "External id": 248299,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035166.819, "dur": 0.864, + "args": { + "External id": 248300,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918035173.149, "dur": 101.697, + "args": { + "External id": 248301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918035174.371, "dur": 5.027, + "args": { + "External id": 248302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918035175.629, "dur": 3.150, + "args": { + "External id": 248303,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035177.765, "dur": 0.869, + "args": { + "External id": 248304,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918035180.225, "dur": 94.097, + "args": { + "External id": 248305,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918035180.975, "dur": 92.246, + "args": { + "External id": 248306,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035280.924, "dur": 4.997, + "args": { + "External id": 248307,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035283.550, "dur": 1.085, + "args": { + "External id": 248308,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918035292.445, "dur": 2.055, + "args": { + "External id": 248309,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918035303.481, "dur": 7.836, + "args": { + "External id": 248310,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918035305.752, "dur": 5.296, + "args": { + "External id": 248311,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918035401.144, "dur": 215.129, + "args": { + "External id": 248312,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918035403.789, "dur": 2.647, + "args": { + "External id": 248313,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918035411.172, "dur": 204.517, + "args": { + "External id": 248314,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918035412.966, "dur": 0.380, + "args": { + "External id": 248315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918035414.767, "dur": 21.566, + "args": { + "External id": 248316,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918035437.949, "dur": 6.624, + "args": { + "External id": 248317,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035440.836, "dur": 3.329, + "args": { + "External id": 248318,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918035445.694, "dur": 53.377, + "args": { + "External id": 248319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918035446.972, "dur": 1.613, + "args": { + "External id": 248320,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918035479.288, "dur": 19.426, + "args": { + "External id": 248321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918035482.811, "dur": 2.462, + "args": { + "External id": 248322,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918035500.835, "dur": 20.234, + "args": { + "External id": 248323,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918035522.618, "dur": 14.259, + "args": { + "External id": 248324,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918035540.084, "dur": 14.891, + "args": { + "External id": 248325,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918035556.617, "dur": 11.609, + "args": { + "External id": 248326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918035570.172, "dur": 19.024, + "args": { + "External id": 248327,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918035571.943, "dur": 1.706, + "args": { + "External id": 248328,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035576.009, "dur": 0.728, + "args": { + "External id": 248329,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918035591.069, "dur": 11.038, + "args": { + "External id": 248330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918035603.467, "dur": 11.095, + "args": { + "External id": 248331,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918035622.947, "dur": 1.967, + "args": { + "External id": 248332,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035633.082, "dur": 3.575, + "args": { + "External id": 248333,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035635.481, "dur": 0.274, + "args": { + "External id": 248334,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918035743.651, "dur": 52.324, + "args": { + "External id": 248335,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035801.079, "dur": 5.928, + "args": { + "External id": 248336,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035804.550, "dur": 1.007, + "args": { + "External id": 248337,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918035808.453, "dur": 24.802, + "args": { + "External id": 248338,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918035838.445, "dur": 6.834, + "args": { + "External id": 248339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918035840.153, "dur": 4.116, + "args": { + "External id": 248340,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035842.659, "dur": 1.362, + "args": { + "External id": 248341,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918035848.367, "dur": 43.856, + "args": { + "External id": 248342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918035849.488, "dur": 42.239, + "args": { + "External id": 248343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918035896.580, "dur": 15.465, + "args": { + "External id": 248344,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035917.785, "dur": 4.216, + "args": { + "External id": 248345,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035920.138, "dur": 0.883, + "args": { + "External id": 248346,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918035926.532, "dur": 49.079, + "args": { + "External id": 248347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918035927.865, "dur": 3.930, + "args": { + "External id": 248348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918035928.450, "dur": 2.689, + "args": { + "External id": 248349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035930.257, "dur": 0.728, + "args": { + "External id": 248350,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918035932.394, "dur": 42.792, + "args": { + "External id": 248351,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918035933.502, "dur": 41.121, + "args": { + "External id": 248352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918035979.375, "dur": 3.923, + "args": { + "External id": 248353,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918035981.362, "dur": 0.819, + "args": { + "External id": 248354,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918035989.871, "dur": 1.685, + "args": { + "External id": 248355,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918035999.190, "dur": 8.637, + "args": { + "External id": 248356,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036001.800, "dur": 5.709, + "args": { + "External id": 248357,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918036088.741, "dur": 212.946, + "args": { + "External id": 248358,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036090.919, "dur": 2.349, + "args": { + "External id": 248359,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918036094.927, "dur": 206.253, + "args": { + "External id": 248360,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918036096.519, "dur": 0.438, + "args": { + "External id": 248361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918036098.194, "dur": 29.649, + "args": { + "External id": 248362,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918036129.674, "dur": 3.271, + "args": { + "External id": 248363,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036131.608, "dur": 1.064, + "args": { + "External id": 248364,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918036134.500, "dur": 24.016, + "args": { + "External id": 248365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036135.855, "dur": 1.601, + "args": { + "External id": 248366,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918036138.497, "dur": 19.724, + "args": { + "External id": 248367,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918036143.011, "dur": 2.207, + "args": { + "External id": 248368,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918036160.056, "dur": 18.909, + "args": { + "External id": 248369,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918036180.275, "dur": 29.583, + "args": { + "External id": 248370,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918036219.640, "dur": 15.411, + "args": { + "External id": 248371,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918036236.396, "dur": 12.446, + "args": { + "External id": 248372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918036250.894, "dur": 20.675, + "args": { + "External id": 248373,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918036252.934, "dur": 2.489, + "args": { + "External id": 248374,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036257.959, "dur": 0.733, + "args": { + "External id": 248375,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918036273.296, "dur": 13.765, + "args": { + "External id": 248376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918036288.243, "dur": 11.439, + "args": { + "External id": 248377,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918036308.356, "dur": 2.508, + "args": { + "External id": 248378,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918036319.798, "dur": 3.552, + "args": { + "External id": 248379,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036322.075, "dur": 0.480, + "args": { + "External id": 248380,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918036389.138, "dur": 46.520, + "args": { + "External id": 248381,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918036439.765, "dur": 6.653, + "args": { + "External id": 248382,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036442.861, "dur": 2.496, + "args": { + "External id": 248383,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918036447.791, "dur": 23.305, + "args": { + "External id": 248384,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918036475.518, "dur": 6.037, + "args": { + "External id": 248385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918036476.903, "dur": 3.925, + "args": { + "External id": 248386,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036479.440, "dur": 1.218, + "args": { + "External id": 248387,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918036484.122, "dur": 42.864, + "args": { + "External id": 248388,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918036485.364, "dur": 41.111, + "args": { + "External id": 248389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918036530.846, "dur": 13.216, + "args": { + "External id": 248390,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918036549.497, "dur": 3.934, + "args": { + "External id": 248391,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036551.630, "dur": 0.881, + "args": { + "External id": 248392,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918036557.461, "dur": 49.064, + "args": { + "External id": 248393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918036558.323, "dur": 4.566, + "args": { + "External id": 248394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918036559.370, "dur": 2.962, + "args": { + "External id": 248395,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036561.394, "dur": 0.796, + "args": { + "External id": 248396,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918036564.078, "dur": 42.077, + "args": { + "External id": 248397,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918036564.553, "dur": 41.094, + "args": { + "External id": 248398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918036610.457, "dur": 3.902, + "args": { + "External id": 248399,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036612.605, "dur": 0.749, + "args": { + "External id": 248400,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918036619.673, "dur": 1.526, + "args": { + "External id": 248401,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036629.105, "dur": 7.070, + "args": { + "External id": 248402,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036631.392, "dur": 4.484, + "args": { + "External id": 248403,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918036755.475, "dur": 188.812, + "args": { + "External id": 248404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036757.817, "dur": 5.090, + "args": { + "External id": 248405,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918036764.727, "dur": 179.104, + "args": { + "External id": 248406,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918036767.007, "dur": 0.340, + "args": { + "External id": 248407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918036768.824, "dur": 21.211, + "args": { + "External id": 248408,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918036791.787, "dur": 6.354, + "args": { + "External id": 248409,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036796.946, "dur": 0.948, + "args": { + "External id": 248410,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918036799.311, "dur": 21.786, + "args": { + "External id": 248411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918036800.447, "dur": 1.753, + "args": { + "External id": 248412,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918036803.559, "dur": 17.245, + "args": { + "External id": 248413,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918036806.372, "dur": 2.701, + "args": { + "External id": 248414,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918036822.645, "dur": 20.526, + "args": { + "External id": 248415,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918036846.132, "dur": 16.098, + "args": { + "External id": 248416,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918036866.441, "dur": 14.029, + "args": { + "External id": 248417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918036882.124, "dur": 11.802, + "args": { + "External id": 248418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918036895.730, "dur": 21.347, + "args": { + "External id": 248419,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918036899.854, "dur": 1.980, + "args": { + "External id": 248420,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036904.228, "dur": 0.652, + "args": { + "External id": 248421,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918036918.701, "dur": 11.342, + "args": { + "External id": 248422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918036931.316, "dur": 11.435, + "args": { + "External id": 248423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918036950.565, "dur": 2.502, + "args": { + "External id": 248424,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918036962.035, "dur": 3.832, + "args": { + "External id": 248425,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918036964.442, "dur": 0.374, + "args": { + "External id": 248426,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918037026.613, "dur": 45.210, + "args": { + "External id": 248427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037076.463, "dur": 5.219, + "args": { + "External id": 248428,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037079.726, "dur": 0.874, + "args": { + "External id": 248429,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918037083.132, "dur": 22.788, + "args": { + "External id": 248430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918037110.473, "dur": 5.993, + "args": { + "External id": 248431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918037112.154, "dur": 3.740, + "args": { + "External id": 248432,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037114.130, "dur": 1.548, + "args": { + "External id": 248433,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918037119.308, "dur": 40.979, + "args": { + "External id": 248434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918037120.477, "dur": 39.021, + "args": { + "External id": 248435,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918037163.725, "dur": 14.428, + "args": { + "External id": 248436,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918037182.879, "dur": 42.048, + "args": { + "External id": 248437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918037185.557, "dur": 38.889, + "args": { + "External id": 248438,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037191.078, "dur": 15.134, + "args": { + "External id": 248439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918037231.413, "dur": 32.390, + "args": { + "External id": 248440,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918037233.436, "dur": 30.119, + "args": { + "External id": 248441,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037239.001, "dur": 4.517, + "args": { + "External id": 248442,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918037245.064, "dur": 17.958, + "args": { + "External id": 248443,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918037280.403, "dur": 6.085, + "args": { + "External id": 248444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918037282.832, "dur": 3.354, + "args": { + "External id": 248445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918037287.525, "dur": 1.699, + "args": { + "External id": 248446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918037288.702, "dur": 0.451, + "args": { + "External id": 248447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918037335.524, "dur": 27.163, + "args": { + "External id": 248448,"Sequence number": 2987721, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "s", "id": 23, "pid": 4183438, "tid": 4183438, "ts": 667918037335.524, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037369.554, "dur": 6.325, + "args": { + "External id": 248449,"Sequence number": 2987722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037373.179, "dur": 1.240, + "args": { + "External id": 248450,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918037378.725, "dur": 6.940, + "args": { + "External id": 248451,"Sequence number": 2987722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037383.352, "dur": 1.238, + "args": { + "External id": 248452,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037387.085, "dur": 3.362, + "args": { + "External id": 248453,"Sequence number": 2987722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037389.147, "dur": 0.738, + "args": { + "External id": 248454,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037394.902, "dur": 5.773, + "args": { + "External id": 248455,"Sequence number": 2987722, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "s", "id": 22, "pid": 4183438, "tid": 4183438, "ts": 667918037394.902, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037398.321, "dur": 1.158, + "args": { + "External id": 248456,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037401.773, "dur": 4.806, + "args": { + "External id": 248457,"Sequence number": 2987723, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "s", "id": 21, "pid": 4183438, "tid": 4183438, "ts": 667918037401.773, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037405.074, "dur": 0.780, + "args": { + "External id": 248458,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918037407.643, "dur": 6.027, + "args": { + "External id": 248459,"Sequence number": 2987724, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 7290 + } + }, + { + "ph": "s", "id": 20, "pid": 4183438, "tid": 4183438, "ts": 667918037407.643, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037411.870, "dur": 0.963, + "args": { + "External id": 248460,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "1024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918037414.794, "dur": 7.065, + "args": { + "External id": 248461,"Sequence number": 2987725, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "s", "id": 19, "pid": 4183438, "tid": 4183438, "ts": 667918037414.794, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037418.383, "dur": 2.740, + "args": { + "External id": 248462,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "1024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918037425.839, "dur": 29.895, + "args": { + "External id": 248463,"Sequence number": 2987726, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918037427.541, "dur": 27.990, + "args": { + "External id": 248464,"Sequence number": 2987726, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037430.521, "dur": 6.837, + "args": { + "External id": 248465,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918037432.867, "dur": 3.698, + "args": { + "External id": 248466,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918037438.523, "dur": 16.321, + "args": { + "External id": 248467,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918037481.802, "dur": 4.250, + "args": { + "External id": 248468,"Sequence number": 2987726, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 7299 + } + }, + { + "ph": "s", "id": 18, "pid": 4183438, "tid": 4183438, "ts": 667918037481.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918037488.644, "dur": 1.444, + "args": { + "External id": 248469,"Sequence number": 2987727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918037522.681, "dur": 22287.011, + "args": { + "External id": 248470,"Sequence number": 2987727, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536], [32000, 1024], [], [], [], [], []], "Ev Idx": 7301 + } + }, + { + "ph": "s", "id": 17, "pid": 4183438, "tid": 4183438, "ts": 667918037522.681, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918037536.991, "dur": 27.119, + "args": { + "External id": 248471,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918037537.531, "dur": 26.379, + "args": { + "External id": 248472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037538.649, "dur": 7.762, + "args": { + "External id": 248473,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918037540.165, "dur": 5.844, + "args": { + "External id": 248474,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918037547.304, "dur": 15.856, + "args": { + "External id": 248475,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [4096, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037579.637, "dur": 25.130, + "args": { + "External id": 248476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037580.740, "dur": 6.579, + "args": { + "External id": 248477,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037583.652, "dur": 3.359, + "args": { + "External id": 248478,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918037588.790, "dur": 15.759, + "args": { + "External id": 248479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918037590.574, "dur": 13.620, + "args": { + "External id": 248480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037608.235, "dur": 20.806, + "args": { + "External id": 248481,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918037609.120, "dur": 6.012, + "args": { + "External id": 248482,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037611.030, "dur": 3.854, + "args": { + "External id": 248483,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918037615.687, "dur": 13.183, + "args": { + "External id": 248484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918037616.568, "dur": 11.936, + "args": { + "External id": 248485,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[32000, 1024], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918037635.305, "dur": 16.169, + "args": { + "External id": 248486,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918037637.031, "dur": 2.857, + "args": { + "External id": 248487,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918037640.689, "dur": 10.532, + "args": { + "External id": 248488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918037641.279, "dur": 9.625, + "args": { + "External id": 248489,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 4183438, + "ts": 667918037693.735, "dur": 27.788, + "args": { + "External id": 248490,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918037725.159, "dur": 51.382, + "args": { + "External id": 248491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918037727.532, "dur": 48.638, + "args": { + "External id": 248492,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037734.927, "dur": 0.911, + "args": { + "External id": 248493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918037737.709, "dur": 23.172, + "args": { + "External id": 248494,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918037739.175, "dur": 21.407, + "args": { + "External id": 248495,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918037742.301, "dur": 3.388, + "args": { + "External id": 248496,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918037746.762, "dur": 13.477, + "args": { + "External id": 248497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918037780.724, "dur": 16111.263, + "args": { + "External id": 248498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918037782.058, "dur": 16108.866, + "args": { + "External id": 248499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918053901.658, "dur": 6.324, + "args": { + "External id": 248500,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918053905.377, "dur": 0.910, + "args": { + "External id": 248501,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918053912.987, "dur": 104.749, + "args": { + "External id": 248502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918053914.923, "dur": 6.901, + "args": { + "External id": 248503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918053917.029, "dur": 3.797, + "args": { + "External id": 248504,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918053919.618, "dur": 0.935, + "args": { + "External id": 248505,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918053923.070, "dur": 93.870, + "args": { + "External id": 248506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918053924.972, "dur": 90.955, + "args": { + "External id": 248507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918054021.525, "dur": 3.878, + "args": { + "External id": 248508,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054023.729, "dur": 0.663, + "args": { + "External id": 248509,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918054032.922, "dur": 2.909, + "args": { + "External id": 248510,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054045.195, "dur": 6.344, + "args": { + "External id": 248511,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054047.617, "dur": 3.616, + "args": { + "External id": 248512,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918054184.858, "dur": 220.619, + "args": { + "External id": 248513,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054189.171, "dur": 14.742, + "args": { + "External id": 248514,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918054207.266, "dur": 197.592, + "args": { + "External id": 248515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918054209.409, "dur": 0.919, + "args": { + "External id": 248516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918054212.229, "dur": 26.574, + "args": { + "External id": 248517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918054241.000, "dur": 6.367, + "args": { + "External id": 248518,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054246.257, "dur": 0.771, + "args": { + "External id": 248519,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918054248.574, "dur": 29.051, + "args": { + "External id": 248520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054254.582, "dur": 1.679, + "args": { + "External id": 248521,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918054257.743, "dur": 19.552, + "args": { + "External id": 248522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918054261.435, "dur": 3.557, + "args": { + "External id": 248523,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918054279.256, "dur": 22.532, + "args": { + "External id": 248524,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918054303.750, "dur": 17.479, + "args": { + "External id": 248525,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918054324.403, "dur": 15.662, + "args": { + "External id": 248526,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918054341.635, "dur": 13.819, + "args": { + "External id": 248527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918054357.387, "dur": 21.017, + "args": { + "External id": 248528,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918054359.787, "dur": 2.005, + "args": { + "External id": 248529,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054364.227, "dur": 1.106, + "args": { + "External id": 248530,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918054380.259, "dur": 11.290, + "args": { + "External id": 248531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918054392.990, "dur": 11.034, + "args": { + "External id": 248532,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918054413.529, "dur": 2.819, + "args": { + "External id": 248533,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918054423.502, "dur": 4.712, + "args": { + "External id": 248534,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054426.469, "dur": 0.792, + "args": { + "External id": 248535,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918054506.570, "dur": 59.780, + "args": { + "External id": 248536,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918054571.565, "dur": 6.619, + "args": { + "External id": 248537,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054574.414, "dur": 1.100, + "args": { + "External id": 248538,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918054579.825, "dur": 27.689, + "args": { + "External id": 248539,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918054613.679, "dur": 7.816, + "args": { + "External id": 248540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918054615.867, "dur": 4.723, + "args": { + "External id": 248541,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054618.732, "dur": 1.604, + "args": { + "External id": 248542,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918054624.958, "dur": 84.933, + "args": { + "External id": 248543,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918054626.429, "dur": 82.326, + "args": { + "External id": 248544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918054716.319, "dur": 17.093, + "args": { + "External id": 248545,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918054740.666, "dur": 5.349, + "args": { + "External id": 248546,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054743.508, "dur": 1.193, + "args": { + "External id": 248547,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918054750.764, "dur": 58.440, + "args": { + "External id": 248548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918054751.955, "dur": 5.225, + "args": { + "External id": 248549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918054753.483, "dur": 3.116, + "args": { + "External id": 248550,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054755.307, "dur": 1.093, + "args": { + "External id": 248551,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918054757.989, "dur": 50.759, + "args": { + "External id": 248552,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918054758.876, "dur": 49.243, + "args": { + "External id": 248553,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918054813.572, "dur": 4.469, + "args": { + "External id": 248554,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054815.993, "dur": 0.815, + "args": { + "External id": 248555,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918054824.843, "dur": 2.180, + "args": { + "External id": 248556,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054835.990, "dur": 9.588, + "args": { + "External id": 248557,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054838.606, "dur": 6.600, + "args": { + "External id": 248558,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918054943.501, "dur": 315.846, + "args": { + "External id": 248559,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054945.757, "dur": 2.514, + "args": { + "External id": 248560,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918054949.987, "dur": 308.604, + "args": { + "External id": 248561,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918054952.166, "dur": 0.583, + "args": { + "External id": 248562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918054954.346, "dur": 26.989, + "args": { + "External id": 248563,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918054983.243, "dur": 4.029, + "args": { + "External id": 248564,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918054985.651, "dur": 1.328, + "args": { + "External id": 248565,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918054988.399, "dur": 27.460, + "args": { + "External id": 248566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918054989.779, "dur": 1.580, + "args": { + "External id": 248567,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918054992.786, "dur": 22.684, + "args": { + "External id": 248568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918054998.511, "dur": 2.929, + "args": { + "External id": 248569,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918055017.413, "dur": 42.283, + "args": { + "External id": 248570,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918055061.383, "dur": 39.860, + "args": { + "External id": 248571,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918055103.756, "dur": 42.845, + "args": { + "External id": 248572,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918055148.138, "dur": 36.815, + "args": { + "External id": 248573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918055186.725, "dur": 42.483, + "args": { + "External id": 248574,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918055189.032, "dur": 2.019, + "args": { + "External id": 248575,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055212.280, "dur": 0.823, + "args": { + "External id": 248576,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918055231.267, "dur": 12.447, + "args": { + "External id": 248577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918055245.087, "dur": 12.476, + "args": { + "External id": 248578,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918055267.423, "dur": 2.965, + "args": { + "External id": 248579,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918055279.773, "dur": 3.658, + "args": { + "External id": 248580,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055281.995, "dur": 0.511, + "args": { + "External id": 248581,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918055353.802, "dur": 50.600, + "args": { + "External id": 248582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918055408.757, "dur": 5.703, + "args": { + "External id": 248583,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055411.734, "dur": 1.686, + "args": { + "External id": 248584,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918055416.165, "dur": 25.352, + "args": { + "External id": 248585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918055447.187, "dur": 5.887, + "args": { + "External id": 248586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918055448.691, "dur": 3.791, + "args": { + "External id": 248587,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055451.001, "dur": 1.293, + "args": { + "External id": 248588,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918055456.067, "dur": 44.875, + "args": { + "External id": 248589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918055457.507, "dur": 42.677, + "args": { + "External id": 248590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918055504.681, "dur": 14.712, + "args": { + "External id": 248591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918055525.107, "dur": 4.542, + "args": { + "External id": 248592,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055527.737, "dur": 1.016, + "args": { + "External id": 248593,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918055533.847, "dur": 68.231, + "args": { + "External id": 248594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918055535.212, "dur": 6.914, + "args": { + "External id": 248595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918055536.110, "dur": 5.427, + "args": { + "External id": 248596,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055538.077, "dur": 3.262, + "args": { + "External id": 248597,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918055556.311, "dur": 45.314, + "args": { + "External id": 248598,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918055559.092, "dur": 42.040, + "args": { + "External id": 248599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918055606.359, "dur": 4.203, + "args": { + "External id": 248600,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055608.565, "dur": 0.912, + "args": { + "External id": 248601,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918055616.290, "dur": 2.128, + "args": { + "External id": 248602,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918055626.595, "dur": 6.594, + "args": { + "External id": 248603,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918055629.141, "dur": 3.767, + "args": { + "External id": 248604,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918055768.506, "dur": 200.080, + "args": { + "External id": 248605,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918055773.211, "dur": 3.182, + "args": { + "External id": 248606,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918055778.296, "dur": 189.605, + "args": { + "External id": 248607,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918055779.766, "dur": 0.693, + "args": { + "External id": 248608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918055781.871, "dur": 23.268, + "args": { + "External id": 248609,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918055807.180, "dur": 6.518, + "args": { + "External id": 248610,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055812.295, "dur": 1.118, + "args": { + "External id": 248611,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918055814.791, "dur": 29.124, + "args": { + "External id": 248612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918055816.267, "dur": 1.417, + "args": { + "External id": 248613,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918055819.511, "dur": 24.008, + "args": { + "External id": 248614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918055822.586, "dur": 2.876, + "args": { + "External id": 248615,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918055845.701, "dur": 19.830, + "args": { + "External id": 248616,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918055867.373, "dur": 16.046, + "args": { + "External id": 248617,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918055885.926, "dur": 14.152, + "args": { + "External id": 248618,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918055901.891, "dur": 16.354, + "args": { + "External id": 248619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918055920.210, "dur": 20.642, + "args": { + "External id": 248620,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918055922.311, "dur": 1.562, + "args": { + "External id": 248621,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055926.273, "dur": 0.873, + "args": { + "External id": 248622,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918055942.956, "dur": 11.415, + "args": { + "External id": 248623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918055955.791, "dur": 11.030, + "args": { + "External id": 248624,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918055979.751, "dur": 2.496, + "args": { + "External id": 248625,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918055991.878, "dur": 3.859, + "args": { + "External id": 248626,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918055994.302, "dur": 0.554, + "args": { + "External id": 248627,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056062.898, "dur": 48.831, + "args": { + "External id": 248628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056116.139, "dur": 5.312, + "args": { + "External id": 248629,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056119.183, "dur": 1.090, + "args": { + "External id": 248630,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918056123.353, "dur": 24.501, + "args": { + "External id": 248631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918056152.400, "dur": 8.226, + "args": { + "External id": 248632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918056154.425, "dur": 5.506, + "args": { + "External id": 248633,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056156.777, "dur": 2.923, + "args": { + "External id": 248634,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918056163.660, "dur": 59.120, + "args": { + "External id": 248635,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056164.887, "dur": 56.606, + "args": { + "External id": 248636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918056228.607, "dur": 16.531, + "args": { + "External id": 248637,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056251.613, "dur": 5.518, + "args": { + "External id": 248638,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056254.880, "dur": 1.042, + "args": { + "External id": 248639,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918056261.309, "dur": 54.420, + "args": { + "External id": 248640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918056262.614, "dur": 5.337, + "args": { + "External id": 248641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918056263.793, "dur": 3.425, + "args": { + "External id": 248642,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056266.071, "dur": 0.956, + "args": { + "External id": 248643,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918056268.631, "dur": 46.713, + "args": { + "External id": 248644,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056270.010, "dur": 44.722, + "args": { + "External id": 248645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056319.923, "dur": 3.963, + "args": { + "External id": 248646,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056322.061, "dur": 0.803, + "args": { + "External id": 248647,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918056330.490, "dur": 1.608, + "args": { + "External id": 248648,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056340.297, "dur": 8.065, + "args": { + "External id": 248649,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056343.423, "dur": 4.643, + "args": { + "External id": 248650,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918056433.429, "dur": 176.259, + "args": { + "External id": 248651,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056435.769, "dur": 2.669, + "args": { + "External id": 248652,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918056442.191, "dur": 166.948, + "args": { + "External id": 248653,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918056443.381, "dur": 0.280, + "args": { + "External id": 248654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918056445.218, "dur": 19.775, + "args": { + "External id": 248655,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918056466.582, "dur": 5.354, + "args": { + "External id": 248656,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056468.413, "dur": 3.060, + "args": { + "External id": 248657,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918056473.253, "dur": 20.889, + "args": { + "External id": 248658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056474.509, "dur": 1.584, + "args": { + "External id": 248659,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918056477.398, "dur": 16.366, + "args": { + "External id": 248660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918056480.218, "dur": 2.463, + "args": { + "External id": 248661,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918056495.777, "dur": 19.609, + "args": { + "External id": 248662,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918056516.747, "dur": 11.963, + "args": { + "External id": 248663,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918056531.157, "dur": 13.122, + "args": { + "External id": 248664,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918056545.712, "dur": 11.575, + "args": { + "External id": 248665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918056559.030, "dur": 21.462, + "args": { + "External id": 248666,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918056560.997, "dur": 1.714, + "args": { + "External id": 248667,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056565.060, "dur": 2.819, + "args": { + "External id": 248668,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918056582.252, "dur": 13.325, + "args": { + "External id": 248669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918056596.673, "dur": 10.991, + "args": { + "External id": 248670,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918056615.630, "dur": 1.552, + "args": { + "External id": 248671,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056625.799, "dur": 3.253, + "args": { + "External id": 248672,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056627.994, "dur": 0.302, + "args": { + "External id": 248673,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056733.420, "dur": 48.844, + "args": { + "External id": 248674,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056787.185, "dur": 5.900, + "args": { + "External id": 248675,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056790.628, "dur": 0.970, + "args": { + "External id": 248676,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918056794.974, "dur": 23.898, + "args": { + "External id": 248677,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918056824.045, "dur": 6.346, + "args": { + "External id": 248678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918056825.576, "dur": 4.200, + "args": { + "External id": 248679,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056827.834, "dur": 1.754, + "args": { + "External id": 248680,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918056833.136, "dur": 41.547, + "args": { + "External id": 248681,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056834.608, "dur": 39.559, + "args": { + "External id": 248682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918056878.617, "dur": 14.424, + "args": { + "External id": 248683,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056898.804, "dur": 4.324, + "args": { + "External id": 248684,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056901.202, "dur": 1.065, + "args": { + "External id": 248685,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918056907.234, "dur": 48.763, + "args": { + "External id": 248686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918056908.431, "dur": 4.108, + "args": { + "External id": 248687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918056909.229, "dur": 2.737, + "args": { + "External id": 248688,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056910.870, "dur": 0.962, + "args": { + "External id": 248689,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918056913.599, "dur": 42.002, + "args": { + "External id": 248690,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918056914.415, "dur": 40.564, + "args": { + "External id": 248691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918056960.647, "dur": 3.899, + "args": { + "External id": 248692,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918056962.554, "dur": 0.937, + "args": { + "External id": 248693,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918056970.226, "dur": 1.767, + "args": { + "External id": 248694,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056979.862, "dur": 6.511, + "args": { + "External id": 248695,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918056982.239, "dur": 3.847, + "args": { + "External id": 248696,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918057070.326, "dur": 199.595, + "args": { + "External id": 248697,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057074.968, "dur": 2.292, + "args": { + "External id": 248698,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918057079.147, "dur": 190.177, + "args": { + "External id": 248699,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918057080.394, "dur": 0.614, + "args": { + "External id": 248700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918057082.359, "dur": 21.384, + "args": { + "External id": 248701,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918057105.332, "dur": 6.387, + "args": { + "External id": 248702,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057110.601, "dur": 0.852, + "args": { + "External id": 248703,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918057112.551, "dur": 22.238, + "args": { + "External id": 248704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057113.777, "dur": 1.706, + "args": { + "External id": 248705,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918057117.315, "dur": 17.212, + "args": { + "External id": 248706,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918057120.204, "dur": 2.274, + "args": { + "External id": 248707,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918057136.163, "dur": 18.589, + "args": { + "External id": 248708,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918057156.194, "dur": 15.209, + "args": { + "External id": 248709,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918057173.726, "dur": 13.579, + "args": { + "External id": 248710,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918057189.282, "dur": 29.005, + "args": { + "External id": 248711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918057221.686, "dur": 21.188, + "args": { + "External id": 248712,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918057223.796, "dur": 2.214, + "args": { + "External id": 248713,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057228.215, "dur": 1.141, + "args": { + "External id": 248714,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918057244.776, "dur": 11.331, + "args": { + "External id": 248715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918057257.351, "dur": 10.787, + "args": { + "External id": 248716,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918057277.040, "dur": 2.831, + "args": { + "External id": 248717,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918057289.584, "dur": 3.929, + "args": { + "External id": 248718,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057292.214, "dur": 0.544, + "args": { + "External id": 248719,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918057359.333, "dur": 47.634, + "args": { + "External id": 248720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918057411.793, "dur": 5.268, + "args": { + "External id": 248721,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057415.152, "dur": 1.032, + "args": { + "External id": 248722,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918057418.652, "dur": 23.662, + "args": { + "External id": 248723,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918057446.779, "dur": 6.246, + "args": { + "External id": 248724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918057448.441, "dur": 3.920, + "args": { + "External id": 248725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057450.789, "dur": 1.339, + "args": { + "External id": 248726,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918057456.051, "dur": 45.300, + "args": { + "External id": 248727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918057457.134, "dur": 43.559, + "args": { + "External id": 248728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918057505.184, "dur": 14.106, + "args": { + "External id": 248729,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918057524.622, "dur": 4.262, + "args": { + "External id": 248730,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057527.226, "dur": 0.830, + "args": { + "External id": 248731,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918057532.546, "dur": 49.863, + "args": { + "External id": 248732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918057534.022, "dur": 4.222, + "args": { + "External id": 248733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918057534.830, "dur": 2.846, + "args": { + "External id": 248734,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057536.640, "dur": 0.889, + "args": { + "External id": 248735,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918057539.007, "dur": 43.099, + "args": { + "External id": 248736,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918057540.119, "dur": 41.264, + "args": { + "External id": 248737,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918057586.455, "dur": 3.550, + "args": { + "External id": 248738,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057588.396, "dur": 0.530, + "args": { + "External id": 248739,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918057595.372, "dur": 1.517, + "args": { + "External id": 248740,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057604.690, "dur": 9.331, + "args": { + "External id": 248741,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057607.283, "dur": 6.414, + "args": { + "External id": 248742,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918057738.056, "dur": 274.647, + "args": { + "External id": 248743,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057740.798, "dur": 3.270, + "args": { + "External id": 248744,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918057745.974, "dur": 266.058, + "args": { + "External id": 248745,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918057747.714, "dur": 0.613, + "args": { + "External id": 248746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918057750.022, "dur": 26.301, + "args": { + "External id": 248747,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918057777.905, "dur": 4.062, + "args": { + "External id": 248748,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057780.551, "dur": 1.095, + "args": { + "External id": 248749,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918057783.118, "dur": 34.787, + "args": { + "External id": 248750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918057784.559, "dur": 1.393, + "args": { + "External id": 248751,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918057787.226, "dur": 30.300, + "args": { + "External id": 248752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918057792.980, "dur": 2.537, + "args": { + "External id": 248753,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918057819.666, "dur": 39.857, + "args": { + "External id": 248754,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918057861.213, "dur": 40.125, + "args": { + "External id": 248755,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918057904.200, "dur": 29.646, + "args": { + "External id": 248756,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918057935.402, "dur": 22.006, + "args": { + "External id": 248757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918057959.114, "dur": 23.430, + "args": { + "External id": 248758,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918057961.292, "dur": 1.595, + "args": { + "External id": 248759,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918057965.476, "dur": 0.775, + "args": { + "External id": 248760,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918057984.301, "dur": 13.524, + "args": { + "External id": 248761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918057998.945, "dur": 11.816, + "args": { + "External id": 248762,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918058019.600, "dur": 2.127, + "args": { + "External id": 248763,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058030.863, "dur": 3.516, + "args": { + "External id": 248764,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058033.188, "dur": 0.338, + "args": { + "External id": 248765,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918058113.279, "dur": 51.277, + "args": { + "External id": 248766,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058169.135, "dur": 7.190, + "args": { + "External id": 248767,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058172.297, "dur": 2.914, + "args": { + "External id": 248768,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918058178.067, "dur": 42.863, + "args": { + "External id": 248769,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918058227.959, "dur": 6.810, + "args": { + "External id": 248770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918058229.711, "dur": 4.103, + "args": { + "External id": 248771,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058232.281, "dur": 1.285, + "args": { + "External id": 248772,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918058237.810, "dur": 49.127, + "args": { + "External id": 248773,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918058239.389, "dur": 46.844, + "args": { + "External id": 248774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918058290.971, "dur": 17.152, + "args": { + "External id": 248775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058314.268, "dur": 4.531, + "args": { + "External id": 248776,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058316.864, "dur": 0.906, + "args": { + "External id": 248777,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918058323.060, "dur": 50.680, + "args": { + "External id": 248778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918058324.374, "dur": 4.582, + "args": { + "External id": 248779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918058325.330, "dur": 2.894, + "args": { + "External id": 248780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058327.267, "dur": 0.829, + "args": { + "External id": 248781,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918058329.838, "dur": 43.559, + "args": { + "External id": 248782,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918058330.575, "dur": 42.223, + "args": { + "External id": 248783,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058377.835, "dur": 4.331, + "args": { + "External id": 248784,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058380.151, "dur": 0.939, + "args": { + "External id": 248785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918058388.372, "dur": 1.879, + "args": { + "External id": 248786,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918058397.919, "dur": 13.293, + "args": { + "External id": 248787,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918058405.042, "dur": 5.865, + "args": { + "External id": 248788,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918058495.977, "dur": 231.914, + "args": { + "External id": 248789,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918058498.525, "dur": 2.327, + "args": { + "External id": 248790,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918058502.937, "dur": 224.175, + "args": { + "External id": 248791,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918058504.252, "dur": 0.475, + "args": { + "External id": 248792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918058506.066, "dur": 24.530, + "args": { + "External id": 248793,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918058532.109, "dur": 3.303, + "args": { + "External id": 248794,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058534.198, "dur": 0.909, + "args": { + "External id": 248795,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918058536.481, "dur": 25.833, + "args": { + "External id": 248796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918058537.913, "dur": 1.558, + "args": { + "External id": 248797,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918058541.216, "dur": 20.685, + "args": { + "External id": 248798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918058546.233, "dur": 2.569, + "args": { + "External id": 248799,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918058563.827, "dur": 19.840, + "args": { + "External id": 248800,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918058585.053, "dur": 14.586, + "args": { + "External id": 248801,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918058601.906, "dur": 13.967, + "args": { + "External id": 248802,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918058617.482, "dur": 16.097, + "args": { + "External id": 248803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918058635.303, "dur": 59.209, + "args": { + "External id": 248804,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918058637.049, "dur": 1.613, + "args": { + "External id": 248805,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058641.028, "dur": 1.004, + "args": { + "External id": 248806,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918058697.973, "dur": 14.668, + "args": { + "External id": 248807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918058713.958, "dur": 11.873, + "args": { + "External id": 248808,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918058736.078, "dur": 2.704, + "args": { + "External id": 248809,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058748.735, "dur": 4.353, + "args": { + "External id": 248810,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058751.359, "dur": 0.801, + "args": { + "External id": 248811,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918058821.090, "dur": 48.164, + "args": { + "External id": 248812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058874.075, "dur": 5.347, + "args": { + "External id": 248813,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058876.886, "dur": 1.458, + "args": { + "External id": 248814,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918058881.120, "dur": 24.665, + "args": { + "External id": 248815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918058910.274, "dur": 5.807, + "args": { + "External id": 248816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918058912.113, "dur": 3.160, + "args": { + "External id": 248817,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058914.183, "dur": 0.908, + "args": { + "External id": 248818,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918058918.768, "dur": 44.424, + "args": { + "External id": 248819,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918058920.308, "dur": 42.155, + "args": { + "External id": 248820,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918058967.102, "dur": 15.149, + "args": { + "External id": 248821,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918058987.877, "dur": 4.548, + "args": { + "External id": 248822,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058990.279, "dur": 1.280, + "args": { + "External id": 248823,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918058996.375, "dur": 51.500, + "args": { + "External id": 248824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918058997.494, "dur": 6.167, + "args": { + "External id": 248825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918058998.243, "dur": 4.811, + "args": { + "External id": 248826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918058999.916, "dur": 2.941, + "args": { + "External id": 248827,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918059004.378, "dur": 43.076, + "args": { + "External id": 248828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918059005.588, "dur": 41.314, + "args": { + "External id": 248829,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059051.858, "dur": 3.695, + "args": { + "External id": 248830,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059053.751, "dur": 0.726, + "args": { + "External id": 248831,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918059061.181, "dur": 1.314, + "args": { + "External id": 248832,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918059102.759, "dur": 8.589, + "args": { + "External id": 248833,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918059106.139, "dur": 4.926, + "args": { + "External id": 248834,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918059189.908, "dur": 212.445, + "args": { + "External id": 248835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918059209.800, "dur": 3.270, + "args": { + "External id": 248836,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918059214.824, "dur": 186.974, + "args": { + "External id": 248837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918059216.087, "dur": 0.480, + "args": { + "External id": 248838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918059218.077, "dur": 24.230, + "args": { + "External id": 248839,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918059244.067, "dur": 5.786, + "args": { + "External id": 248840,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059248.441, "dur": 0.900, + "args": { + "External id": 248841,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918059251.291, "dur": 22.007, + "args": { + "External id": 248842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918059252.465, "dur": 1.112, + "args": { + "External id": 248843,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918059254.694, "dur": 18.273, + "args": { + "External id": 248844,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918059257.340, "dur": 2.318, + "args": { + "External id": 248845,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918059275.073, "dur": 21.714, + "args": { + "External id": 248846,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918059298.353, "dur": 13.563, + "args": { + "External id": 248847,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918059314.659, "dur": 16.117, + "args": { + "External id": 248848,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918059332.444, "dur": 13.672, + "args": { + "External id": 248849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918059347.852, "dur": 25.521, + "args": { + "External id": 248850,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918059351.774, "dur": 1.370, + "args": { + "External id": 248851,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059355.663, "dur": 0.745, + "args": { + "External id": 248852,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918059375.187, "dur": 12.137, + "args": { + "External id": 248853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918059388.673, "dur": 12.192, + "args": { + "External id": 248854,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918059409.555, "dur": 2.089, + "args": { + "External id": 248855,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059420.706, "dur": 3.495, + "args": { + "External id": 248856,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059422.904, "dur": 0.342, + "args": { + "External id": 248857,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918059490.105, "dur": 46.915, + "args": { + "External id": 248858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059541.538, "dur": 5.364, + "args": { + "External id": 248859,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059544.595, "dur": 1.292, + "args": { + "External id": 248860,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918059548.274, "dur": 24.360, + "args": { + "External id": 248861,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918059577.233, "dur": 6.204, + "args": { + "External id": 248862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918059579.269, "dur": 3.513, + "args": { + "External id": 248863,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059581.291, "dur": 1.251, + "args": { + "External id": 248864,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918059586.429, "dur": 42.441, + "args": { + "External id": 248865,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918059587.364, "dur": 40.913, + "args": { + "External id": 248866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918059632.783, "dur": 16.036, + "args": { + "External id": 248867,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918059653.424, "dur": 66.293, + "args": { + "External id": 248868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918059692.767, "dur": 26.560, + "args": { + "External id": 248869,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059699.110, "dur": 1.033, + "args": { + "External id": 248870,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918059725.725, "dur": 37.912, + "args": { + "External id": 248871,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918059732.308, "dur": 31.103, + "args": { + "External id": 248872,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], [], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059738.036, "dur": 4.572, + "args": { + "External id": 248873,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918059743.861, "dur": 19.014, + "args": { + "External id": 248874,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918059777.358, "dur": 5.742, + "args": { + "External id": 248875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918059779.787, "dur": 3.015, + "args": { + "External id": 248876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918059784.046, "dur": 1.907, + "args": { + "External id": 248877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918059785.068, "dur": 0.703, + "args": { + "External id": 248878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918059825.444, "dur": 23.527, + "args": { + "External id": 248879,"Sequence number": 2987728, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918059851.384, "dur": 14.571, + "args": { + "External id": 248880,"Sequence number": 2987729, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 7711 + } + }, + { + "ph": "s", "id": 16, "pid": 4183438, "tid": 4183438, "ts": 667918059851.384, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059872.340, "dur": 6.133, + "args": { + "External id": 248881,"Sequence number": 2987730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059875.923, "dur": 1.199, + "args": { + "External id": 248882,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918059880.968, "dur": 6.434, + "args": { + "External id": 248883,"Sequence number": 2987730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059885.364, "dur": 0.765, + "args": { + "External id": 248884,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059888.631, "dur": 3.120, + "args": { + "External id": 248885,"Sequence number": 2987730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059890.519, "dur": 0.657, + "args": { + "External id": 248886,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059895.532, "dur": 5.992, + "args": { + "External id": 248887,"Sequence number": 2987730, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "s", "id": 15, "pid": 4183438, "tid": 4183438, "ts": 667918059895.532, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059899.264, "dur": 0.949, + "args": { + "External id": 248888,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059902.606, "dur": 5.625, + "args": { + "External id": 248889,"Sequence number": 2987731, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 7720 + } + }, + { + "ph": "s", "id": 14, "pid": 4183438, "tid": 4183438, "ts": 667918059902.606, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059906.436, "dur": 1.041, + "args": { + "External id": 248890,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918059909.306, "dur": 5.400, + "args": { + "External id": 248891,"Sequence number": 2987732, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "s", "id": 13, "pid": 4183438, "tid": 4183438, "ts": 667918059909.306, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059913.167, "dur": 0.750, + "args": { + "External id": 248892,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918059915.659, "dur": 3.887, + "args": { + "External id": 248893,"Sequence number": 2987733, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 7724 + } + }, + { + "ph": "s", "id": 12, "pid": 4183438, "tid": 4183438, "ts": 667918059915.659, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918059918.124, "dur": 0.741, + "args": { + "External id": 248894,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918059923.385, "dur": 33.319, + "args": { + "External id": 248895,"Sequence number": 2987734, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918059924.730, "dur": 31.757, + "args": { + "External id": 248896,"Sequence number": 2987734, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918059927.403, "dur": 9.153, + "args": { + "External id": 248897,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918059930.049, "dur": 5.831, + "args": { + "External id": 248898,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918059937.480, "dur": 18.428, + "args": { + "External id": 248899,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918059983.761, "dur": 4.228, + "args": { + "External id": 248900,"Sequence number": 2987734, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 7731 + } + }, + { + "ph": "s", "id": 11, "pid": 4183438, "tid": 4183438, "ts": 667918059983.761, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918059990.784, "dur": 1.370, + "args": { + "External id": 248901,"Sequence number": 2987735, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918060023.528, "dur": 21967.304, + "args": { + "External id": 248902,"Sequence number": 2987735, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536], [32000, 1024], [], [], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "s", "id": 10, "pid": 4183438, "tid": 4183438, "ts": 667918060023.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918060037.060, "dur": 25.433, + "args": { + "External id": 248903,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918060037.983, "dur": 24.298, + "args": { + "External id": 248904,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918060039.225, "dur": 5.775, + "args": { + "External id": 248905,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918060040.981, "dur": 3.573, + "args": { + "External id": 248906,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918060045.642, "dur": 16.191, + "args": { + "External id": 248907,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [4096, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918060077.687, "dur": 25.908, + "args": { + "External id": 248908,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918060078.972, "dur": 6.357, + "args": { + "External id": 248909,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918060081.193, "dur": 3.828, + "args": { + "External id": 248910,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918060086.772, "dur": 16.608, + "args": { + "External id": 248911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918060088.556, "dur": 14.379, + "args": { + "External id": 248912,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918060106.993, "dur": 22.476, + "args": { + "External id": 248913,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918060107.912, "dur": 4.843, + "args": { + "External id": 248914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918060109.611, "dur": 2.909, + "args": { + "External id": 248915,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918060113.386, "dur": 15.867, + "args": { + "External id": 248916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918060114.278, "dur": 14.579, + "args": { + "External id": 248917,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[32000, 1024], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918060135.468, "dur": 17.522, + "args": { + "External id": 248918,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918060137.109, "dur": 2.970, + "args": { + "External id": 248919,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918060140.675, "dur": 12.032, + "args": { + "External id": 248920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918060141.257, "dur": 11.141, + "args": { + "External id": 248921,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 4183438, + "ts": 667918060157.525, "dur": 30.687, + "args": { + "External id": 248922,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918060191.099, "dur": 71.937, + "args": { + "External id": 248923,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918060209.305, "dur": 53.295, + "args": { + "External id": 248924,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918060215.047, "dur": 1.102, + "args": { + "External id": 248925,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918060217.901, "dur": 26.935, + "args": { + "External id": 248926,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918060221.977, "dur": 22.659, + "args": { + "External id": 248927,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918060224.433, "dur": 2.794, + "args": { + "External id": 248928,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918060228.430, "dur": 15.759, + "args": { + "External id": 248929,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918060267.198, "dur": 15883.640, + "args": { + "External id": 248930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918060268.732, "dur": 15881.026, + "args": { + "External id": 248931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918076160.686, "dur": 6.955, + "args": { + "External id": 248932,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076165.040, "dur": 1.071, + "args": { + "External id": 248933,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918076173.571, "dur": 115.367, + "args": { + "External id": 248934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918076175.243, "dur": 6.583, + "args": { + "External id": 248935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918076177.506, "dur": 3.380, + "args": { + "External id": 248936,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076179.671, "dur": 0.985, + "args": { + "External id": 248937,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918076183.239, "dur": 104.898, + "args": { + "External id": 248938,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918076185.002, "dur": 101.981, + "args": { + "External id": 248939,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918076294.057, "dur": 5.100, + "args": { + "External id": 248940,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076296.789, "dur": 0.840, + "args": { + "External id": 248941,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918076307.223, "dur": 2.313, + "args": { + "External id": 248942,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918076318.705, "dur": 6.608, + "args": { + "External id": 248943,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918076321.080, "dur": 3.931, + "args": { + "External id": 248944,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918076459.923, "dur": 259.038, + "args": { + "External id": 248945,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918076462.617, "dur": 3.960, + "args": { + "External id": 248946,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918076468.658, "dur": 249.633, + "args": { + "External id": 248947,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918076470.305, "dur": 0.809, + "args": { + "External id": 248948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918076473.115, "dur": 27.782, + "args": { + "External id": 248949,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918076503.013, "dur": 5.755, + "args": { + "External id": 248950,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076507.742, "dur": 0.711, + "args": { + "External id": 248951,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918076510.119, "dur": 26.665, + "args": { + "External id": 248952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918076511.465, "dur": 1.499, + "args": { + "External id": 248953,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918076514.361, "dur": 22.091, + "args": { + "External id": 248954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918076518.697, "dur": 3.345, + "args": { + "External id": 248955,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918076538.680, "dur": 24.238, + "args": { + "External id": 248956,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918076564.819, "dur": 14.927, + "args": { + "External id": 248957,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918076582.410, "dur": 18.632, + "args": { + "External id": 248958,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918076602.968, "dur": 14.266, + "args": { + "External id": 248959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918076619.127, "dur": 23.687, + "args": { + "External id": 248960,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918076621.473, "dur": 1.565, + "args": { + "External id": 248961,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076625.450, "dur": 2.899, + "args": { + "External id": 248962,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918076644.716, "dur": 55.788, + "args": { + "External id": 248963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918076703.162, "dur": 13.536, + "args": { + "External id": 248964,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918076728.000, "dur": 2.975, + "args": { + "External id": 248965,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918076738.180, "dur": 4.911, + "args": { + "External id": 248966,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076741.043, "dur": 1.136, + "args": { + "External id": 248967,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918076824.134, "dur": 59.836, + "args": { + "External id": 248968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918076889.241, "dur": 6.914, + "args": { + "External id": 248969,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076892.333, "dur": 1.111, + "args": { + "External id": 248970,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918076897.834, "dur": 25.167, + "args": { + "External id": 248971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918076928.482, "dur": 7.221, + "args": { + "External id": 248972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918076930.593, "dur": 4.262, + "args": { + "External id": 248973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918076932.883, "dur": 1.723, + "args": { + "External id": 248974,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918076938.869, "dur": 46.404, + "args": { + "External id": 248975,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918076940.066, "dur": 44.497, + "args": { + "External id": 248976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918076989.328, "dur": 15.527, + "args": { + "External id": 248977,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077010.791, "dur": 4.440, + "args": { + "External id": 248978,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077013.182, "dur": 0.906, + "args": { + "External id": 248979,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918077019.624, "dur": 51.080, + "args": { + "External id": 248980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918077020.663, "dur": 4.378, + "args": { + "External id": 248981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918077021.570, "dur": 2.858, + "args": { + "External id": 248982,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077023.351, "dur": 0.923, + "args": { + "External id": 248983,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918077025.648, "dur": 44.559, + "args": { + "External id": 248984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918077026.367, "dur": 43.212, + "args": { + "External id": 248985,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077074.748, "dur": 4.665, + "args": { + "External id": 248986,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077077.143, "dur": 0.840, + "args": { + "External id": 248987,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918077085.741, "dur": 1.502, + "args": { + "External id": 248988,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077095.936, "dur": 6.892, + "args": { + "External id": 248989,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077098.286, "dur": 4.260, + "args": { + "External id": 248990,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918077211.469, "dur": 320.711, + "args": { + "External id": 248991,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077216.445, "dur": 3.243, + "args": { + "External id": 248992,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918077221.409, "dur": 310.261, + "args": { + "External id": 248993,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918077223.165, "dur": 0.426, + "args": { + "External id": 248994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918077225.103, "dur": 27.099, + "args": { + "External id": 248995,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918077254.158, "dur": 6.038, + "args": { + "External id": 248996,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077258.859, "dur": 1.027, + "args": { + "External id": 248997,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918077261.452, "dur": 41.043, + "args": { + "External id": 248998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077262.822, "dur": 1.798, + "args": { + "External id": 248999,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918077276.803, "dur": 25.385, + "args": { + "External id": 249000,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918077280.062, "dur": 3.031, + "args": { + "External id": 249001,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918077304.040, "dur": 39.736, + "args": { + "External id": 249002,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918077345.348, "dur": 46.594, + "args": { + "External id": 249003,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918077394.929, "dur": 42.092, + "args": { + "External id": 249004,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918077438.478, "dur": 33.129, + "args": { + "External id": 249005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918077473.692, "dur": 27.871, + "args": { + "External id": 249006,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918077475.771, "dur": 1.824, + "args": { + "External id": 249007,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077480.034, "dur": 0.882, + "args": { + "External id": 249008,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918077503.055, "dur": 14.292, + "args": { + "External id": 249009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918077518.767, "dur": 12.030, + "args": { + "External id": 249010,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918077539.278, "dur": 2.492, + "args": { + "External id": 249011,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077550.663, "dur": 4.224, + "args": { + "External id": 249012,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077553.575, "dur": 0.484, + "args": { + "External id": 249013,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918077626.567, "dur": 82.688, + "args": { + "External id": 249014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077716.214, "dur": 6.186, + "args": { + "External id": 249015,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077719.618, "dur": 0.987, + "args": { + "External id": 249016,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918077723.764, "dur": 24.629, + "args": { + "External id": 249017,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918077753.627, "dur": 6.751, + "args": { + "External id": 249018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918077755.847, "dur": 3.904, + "args": { + "External id": 249019,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077758.279, "dur": 1.271, + "args": { + "External id": 249020,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918077763.538, "dur": 46.982, + "args": { + "External id": 249021,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918077764.917, "dur": 44.904, + "args": { + "External id": 249022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918077814.201, "dur": 15.007, + "args": { + "External id": 249023,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077834.493, "dur": 4.496, + "args": { + "External id": 249024,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077836.821, "dur": 1.229, + "args": { + "External id": 249025,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918077842.955, "dur": 47.888, + "args": { + "External id": 249026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918077843.947, "dur": 4.165, + "args": { + "External id": 249027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918077845.011, "dur": 2.508, + "args": { + "External id": 249028,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077846.552, "dur": 0.813, + "args": { + "External id": 249029,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918077848.748, "dur": 41.732, + "args": { + "External id": 249030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918077849.613, "dur": 40.329, + "args": { + "External id": 249031,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918077895.026, "dur": 4.419, + "args": { + "External id": 249032,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918077897.369, "dur": 0.917, + "args": { + "External id": 249033,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918077905.830, "dur": 1.910, + "args": { + "External id": 249034,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077915.853, "dur": 8.830, + "args": { + "External id": 249035,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918077918.596, "dur": 5.752, + "args": { + "External id": 249036,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918078013.038, "dur": 209.705, + "args": { + "External id": 249037,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078015.451, "dur": 1.965, + "args": { + "External id": 249038,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918078019.013, "dur": 203.109, + "args": { + "External id": 249039,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918078020.785, "dur": 0.280, + "args": { + "External id": 249040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918078022.588, "dur": 23.453, + "args": { + "External id": 249041,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918078047.494, "dur": 3.603, + "args": { + "External id": 249042,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078049.779, "dur": 0.987, + "args": { + "External id": 249043,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918078051.943, "dur": 24.349, + "args": { + "External id": 249044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078053.092, "dur": 2.082, + "args": { + "External id": 249045,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918078056.232, "dur": 19.603, + "args": { + "External id": 249046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918078061.205, "dur": 2.293, + "args": { + "External id": 249047,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918078077.736, "dur": 19.279, + "args": { + "External id": 249048,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918078098.700, "dur": 13.355, + "args": { + "External id": 249049,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918078114.397, "dur": 14.813, + "args": { + "External id": 249050,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918078130.393, "dur": 13.570, + "args": { + "External id": 249051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918078145.708, "dur": 21.217, + "args": { + "External id": 249052,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918078147.633, "dur": 1.615, + "args": { + "External id": 249053,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078151.412, "dur": 0.927, + "args": { + "External id": 249054,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918078168.672, "dur": 19.569, + "args": { + "External id": 249055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918078189.448, "dur": 30.843, + "args": { + "External id": 249056,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918078231.145, "dur": 2.443, + "args": { + "External id": 249057,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918078243.055, "dur": 3.920, + "args": { + "External id": 249058,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078245.718, "dur": 0.440, + "args": { + "External id": 249059,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918078313.950, "dur": 48.032, + "args": { + "External id": 249060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918078366.255, "dur": 5.584, + "args": { + "External id": 249061,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078369.211, "dur": 1.408, + "args": { + "External id": 249062,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918078373.106, "dur": 25.030, + "args": { + "External id": 249063,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918078402.399, "dur": 5.869, + "args": { + "External id": 249064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918078404.282, "dur": 3.310, + "args": { + "External id": 249065,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078406.559, "dur": 0.842, + "args": { + "External id": 249066,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918078410.608, "dur": 41.598, + "args": { + "External id": 249067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918078411.960, "dur": 39.466, + "args": { + "External id": 249068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918078455.874, "dur": 16.343, + "args": { + "External id": 249069,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918078477.704, "dur": 3.552, + "args": { + "External id": 249070,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078479.863, "dur": 0.515, + "args": { + "External id": 249071,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918078485.215, "dur": 50.502, + "args": { + "External id": 249072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918078486.072, "dur": 6.277, + "args": { + "External id": 249073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918078486.966, "dur": 4.821, + "args": { + "External id": 249074,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078488.368, "dur": 3.237, + "args": { + "External id": 249075,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918078493.126, "dur": 41.992, + "args": { + "External id": 249076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918078493.902, "dur": 40.445, + "args": { + "External id": 249077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918078539.797, "dur": 3.458, + "args": { + "External id": 249078,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078541.559, "dur": 0.510, + "args": { + "External id": 249079,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918078548.709, "dur": 1.432, + "args": { + "External id": 249080,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078557.929, "dur": 6.510, + "args": { + "External id": 249081,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078560.178, "dur": 3.988, + "args": { + "External id": 249082,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918078643.873, "dur": 218.255, + "args": { + "External id": 249083,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078646.286, "dur": 2.489, + "args": { + "External id": 249084,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918078650.453, "dur": 211.233, + "args": { + "External id": 249085,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918078687.098, "dur": 0.345, + "args": { + "External id": 249086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918078691.296, "dur": 25.680, + "args": { + "External id": 249087,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918078718.922, "dur": 3.650, + "args": { + "External id": 249088,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078721.221, "dur": 1.014, + "args": { + "External id": 249089,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918078723.547, "dur": 23.820, + "args": { + "External id": 249090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918078724.661, "dur": 3.770, + "args": { + "External id": 249091,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918078729.684, "dur": 17.322, + "args": { + "External id": 249092,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918078732.487, "dur": 2.955, + "args": { + "External id": 249093,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918078748.983, "dur": 20.316, + "args": { + "External id": 249094,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918078770.996, "dur": 13.114, + "args": { + "External id": 249095,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918078787.085, "dur": 13.548, + "args": { + "External id": 249096,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918078801.915, "dur": 11.905, + "args": { + "External id": 249097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918078815.521, "dur": 19.892, + "args": { + "External id": 249098,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918078817.371, "dur": 1.711, + "args": { + "External id": 249099,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078821.774, "dur": 0.890, + "args": { + "External id": 249100,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918078837.087, "dur": 11.367, + "args": { + "External id": 249101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918078849.617, "dur": 10.709, + "args": { + "External id": 249102,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918078869.600, "dur": 2.620, + "args": { + "External id": 249103,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918078881.688, "dur": 4.000, + "args": { + "External id": 249104,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918078884.077, "dur": 0.609, + "args": { + "External id": 249105,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918078957.827, "dur": 49.944, + "args": { + "External id": 249106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079012.614, "dur": 5.104, + "args": { + "External id": 249107,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079015.488, "dur": 1.051, + "args": { + "External id": 249108,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918079019.291, "dur": 24.526, + "args": { + "External id": 249109,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918079048.420, "dur": 8.533, + "args": { + "External id": 249110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918079050.297, "dur": 5.809, + "args": { + "External id": 249111,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079052.622, "dur": 3.210, + "args": { + "External id": 249112,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918079059.525, "dur": 43.984, + "args": { + "External id": 249113,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918079060.716, "dur": 42.136, + "args": { + "External id": 249114,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918079107.562, "dur": 14.442, + "args": { + "External id": 249115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079127.397, "dur": 4.173, + "args": { + "External id": 249116,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079129.910, "dur": 0.670, + "args": { + "External id": 249117,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918079135.640, "dur": 82.321, + "args": { + "External id": 249118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918079136.528, "dur": 4.559, + "args": { + "External id": 249119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918079137.455, "dur": 3.045, + "args": { + "External id": 249120,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079139.073, "dur": 1.060, + "args": { + "External id": 249121,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918079154.408, "dur": 62.997, + "args": { + "External id": 249122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918079155.404, "dur": 61.088, + "args": { + "External id": 249123,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079224.244, "dur": 5.316, + "args": { + "External id": 249124,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079226.930, "dur": 1.257, + "args": { + "External id": 249125,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918079236.205, "dur": 1.848, + "args": { + "External id": 249126,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079246.802, "dur": 9.352, + "args": { + "External id": 249127,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079249.232, "dur": 6.570, + "args": { + "External id": 249128,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918079346.813, "dur": 175.925, + "args": { + "External id": 249129,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079349.365, "dur": 2.254, + "args": { + "External id": 249130,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918079353.186, "dur": 168.892, + "args": { + "External id": 249131,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918079354.961, "dur": 0.296, + "args": { + "External id": 249132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918079356.533, "dur": 23.631, + "args": { + "External id": 249133,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918079381.664, "dur": 3.232, + "args": { + "External id": 249134,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079383.883, "dur": 0.754, + "args": { + "External id": 249135,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918079385.968, "dur": 23.042, + "args": { + "External id": 249136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079387.021, "dur": 1.613, + "args": { + "External id": 249137,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918079390.123, "dur": 18.577, + "args": { + "External id": 249138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918079395.274, "dur": 2.098, + "args": { + "External id": 249139,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918079410.688, "dur": 19.371, + "args": { + "External id": 249140,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918079431.894, "dur": 12.983, + "args": { + "External id": 249141,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918079447.359, "dur": 14.013, + "args": { + "External id": 249142,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918079462.532, "dur": 12.233, + "args": { + "External id": 249143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918079476.325, "dur": 19.585, + "args": { + "External id": 249144,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918079478.193, "dur": 2.013, + "args": { + "External id": 249145,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079482.774, "dur": 1.042, + "args": { + "External id": 249146,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918079497.564, "dur": 11.040, + "args": { + "External id": 249147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918079509.882, "dur": 11.244, + "args": { + "External id": 249148,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918079528.566, "dur": 1.693, + "args": { + "External id": 249149,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079538.400, "dur": 3.402, + "args": { + "External id": 249150,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079540.493, "dur": 0.507, + "args": { + "External id": 249151,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918079603.310, "dur": 44.543, + "args": { + "External id": 249152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079687.403, "dur": 7.080, + "args": { + "External id": 249153,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079690.690, "dur": 2.072, + "args": { + "External id": 249154,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918079695.965, "dur": 26.541, + "args": { + "External id": 249155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918079728.663, "dur": 6.144, + "args": { + "External id": 249156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918079730.596, "dur": 3.427, + "args": { + "External id": 249157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079732.709, "dur": 1.083, + "args": { + "External id": 249158,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918079737.885, "dur": 51.361, + "args": { + "External id": 249159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918079739.115, "dur": 49.545, + "args": { + "External id": 249160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918079793.266, "dur": 15.329, + "args": { + "External id": 249161,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079814.518, "dur": 3.940, + "args": { + "External id": 249162,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079816.481, "dur": 1.049, + "args": { + "External id": 249163,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918079822.584, "dur": 51.571, + "args": { + "External id": 249164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918079823.940, "dur": 6.561, + "args": { + "External id": 249165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918079824.848, "dur": 5.080, + "args": { + "External id": 249166,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079826.981, "dur": 2.613, + "args": { + "External id": 249167,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918079831.299, "dur": 42.394, + "args": { + "External id": 249168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918079832.043, "dur": 41.071, + "args": { + "External id": 249169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918079877.971, "dur": 4.351, + "args": { + "External id": 249170,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918079880.124, "dur": 1.036, + "args": { + "External id": 249171,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918079889.003, "dur": 1.805, + "args": { + "External id": 249172,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079898.775, "dur": 7.254, + "args": { + "External id": 249173,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079901.284, "dur": 4.447, + "args": { + "External id": 249174,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918079994.289, "dur": 318.301, + "args": { + "External id": 249175,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918079996.851, "dur": 2.531, + "args": { + "External id": 249176,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918080001.108, "dur": 310.893, + "args": { + "External id": 249177,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918080002.298, "dur": 0.446, + "args": { + "External id": 249178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918080005.704, "dur": 22.229, + "args": { + "External id": 249179,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918080029.627, "dur": 3.243, + "args": { + "External id": 249180,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080031.957, "dur": 0.643, + "args": { + "External id": 249181,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918080034.113, "dur": 26.245, + "args": { + "External id": 249182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918080035.374, "dur": 3.673, + "args": { + "External id": 249183,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918080040.186, "dur": 19.881, + "args": { + "External id": 249184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918080042.737, "dur": 2.450, + "args": { + "External id": 249185,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918080061.789, "dur": 31.747, + "args": { + "External id": 249186,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918080095.153, "dur": 43.068, + "args": { + "External id": 249187,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918080140.773, "dur": 43.091, + "args": { + "External id": 249188,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918080185.219, "dur": 42.769, + "args": { + "External id": 249189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918080230.782, "dur": 32.942, + "args": { + "External id": 249190,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918080232.890, "dur": 2.004, + "args": { + "External id": 249191,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080237.292, "dur": 0.860, + "args": { + "External id": 249192,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918080265.470, "dur": 26.433, + "args": { + "External id": 249193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918080293.063, "dur": 17.666, + "args": { + "External id": 249194,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918080320.058, "dur": 2.233, + "args": { + "External id": 249195,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918080330.741, "dur": 4.227, + "args": { + "External id": 249196,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080333.456, "dur": 0.608, + "args": { + "External id": 249197,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918080402.904, "dur": 46.990, + "args": { + "External id": 249198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918080454.068, "dur": 5.276, + "args": { + "External id": 249199,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080457.085, "dur": 1.060, + "args": { + "External id": 249200,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918080460.573, "dur": 22.902, + "args": { + "External id": 249201,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918080488.271, "dur": 8.450, + "args": { + "External id": 249202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918080489.892, "dur": 6.108, + "args": { + "External id": 249203,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080492.187, "dur": 3.616, + "args": { + "External id": 249204,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918080498.877, "dur": 44.181, + "args": { + "External id": 249205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918080500.057, "dur": 42.055, + "args": { + "External id": 249206,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918080546.875, "dur": 15.648, + "args": { + "External id": 249207,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918080567.856, "dur": 3.886, + "args": { + "External id": 249208,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080570.158, "dur": 0.766, + "args": { + "External id": 249209,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918080575.716, "dur": 55.410, + "args": { + "External id": 249210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918080577.040, "dur": 4.625, + "args": { + "External id": 249211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918080578.247, "dur": 2.887, + "args": { + "External id": 249212,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080580.224, "dur": 0.753, + "args": { + "External id": 249213,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918080582.541, "dur": 48.154, + "args": { + "External id": 249214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918080583.427, "dur": 46.594, + "args": { + "External id": 249215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918080635.164, "dur": 3.420, + "args": { + "External id": 249216,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080636.924, "dur": 0.433, + "args": { + "External id": 249217,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918080643.756, "dur": 1.696, + "args": { + "External id": 249218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918080700.252, "dur": 8.534, + "args": { + "External id": 249219,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918080703.117, "dur": 5.209, + "args": { + "External id": 249220,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918080795.706, "dur": 180.492, + "args": { + "External id": 249221,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918080798.216, "dur": 2.319, + "args": { + "External id": 249222,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918080804.465, "dur": 171.216, + "args": { + "External id": 249223,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918080806.097, "dur": 0.369, + "args": { + "External id": 249224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918080807.653, "dur": 23.024, + "args": { + "External id": 249225,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918080832.012, "dur": 6.130, + "args": { + "External id": 249226,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080834.902, "dur": 2.760, + "args": { + "External id": 249227,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918080839.227, "dur": 21.959, + "args": { + "External id": 249228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918080840.400, "dur": 1.953, + "args": { + "External id": 249229,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918080843.724, "dur": 17.204, + "args": { + "External id": 249230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918080846.389, "dur": 2.773, + "args": { + "External id": 249231,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918080862.567, "dur": 20.222, + "args": { + "External id": 249232,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918080884.446, "dur": 12.698, + "args": { + "External id": 249233,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918080899.284, "dur": 14.101, + "args": { + "External id": 249234,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918080914.736, "dur": 12.144, + "args": { + "External id": 249235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918080928.648, "dur": 21.099, + "args": { + "External id": 249236,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918080930.634, "dur": 1.783, + "args": { + "External id": 249237,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080934.378, "dur": 3.362, + "args": { + "External id": 249238,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918080951.506, "dur": 10.939, + "args": { + "External id": 249239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918080963.736, "dur": 10.793, + "args": { + "External id": 249240,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918080982.545, "dur": 2.512, + "args": { + "External id": 249241,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918080994.669, "dur": 3.632, + "args": { + "External id": 249242,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918080996.925, "dur": 0.571, + "args": { + "External id": 249243,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918081063.178, "dur": 46.650, + "args": { + "External id": 249244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918081127.088, "dur": 5.449, + "args": { + "External id": 249245,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081130.413, "dur": 1.086, + "args": { + "External id": 249246,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918081133.848, "dur": 22.459, + "args": { + "External id": 249247,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918081161.179, "dur": 8.526, + "args": { + "External id": 249248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918081163.248, "dur": 5.843, + "args": { + "External id": 249249,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081165.758, "dur": 3.152, + "args": { + "External id": 249250,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918081172.249, "dur": 59.646, + "args": { + "External id": 249251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918081173.528, "dur": 57.156, + "args": { + "External id": 249252,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918081237.170, "dur": 15.036, + "args": { + "External id": 249253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918081258.657, "dur": 4.395, + "args": { + "External id": 249254,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081261.292, "dur": 0.723, + "args": { + "External id": 249255,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918081266.930, "dur": 50.995, + "args": { + "External id": 249256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918081267.796, "dur": 4.761, + "args": { + "External id": 249257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918081269.118, "dur": 2.881, + "args": { + "External id": 249258,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081271.011, "dur": 0.840, + "args": { + "External id": 249259,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918081273.407, "dur": 44.115, + "args": { + "External id": 249260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918081274.456, "dur": 42.358, + "args": { + "External id": 249261,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918081321.985, "dur": 3.376, + "args": { + "External id": 249262,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081323.635, "dur": 0.654, + "args": { + "External id": 249263,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918081331.819, "dur": 1.728, + "args": { + "External id": 249264,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918081341.040, "dur": 7.886, + "args": { + "External id": 249265,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918081343.771, "dur": 4.866, + "args": { + "External id": 249266,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918081430.339, "dur": 171.103, + "args": { + "External id": 249267,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918081432.290, "dur": 1.977, + "args": { + "External id": 249268,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918081438.758, "dur": 162.241, + "args": { + "External id": 249269,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918081439.927, "dur": 0.552, + "args": { + "External id": 249270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918081441.450, "dur": 19.898, + "args": { + "External id": 249271,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918081462.932, "dur": 4.613, + "args": { + "External id": 249272,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081464.650, "dur": 2.627, + "args": { + "External id": 249273,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918081468.644, "dur": 22.128, + "args": { + "External id": 249274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918081470.314, "dur": 1.446, + "args": { + "External id": 249275,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918081473.041, "dur": 17.309, + "args": { + "External id": 249276,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918081475.697, "dur": 2.520, + "args": { + "External id": 249277,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918081492.323, "dur": 17.972, + "args": { + "External id": 249278,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918081511.614, "dur": 12.800, + "args": { + "External id": 249279,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918081526.943, "dur": 12.594, + "args": { + "External id": 249280,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918081540.640, "dur": 11.971, + "args": { + "External id": 249281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918081554.325, "dur": 20.918, + "args": { + "External id": 249282,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918081556.160, "dur": 1.599, + "args": { + "External id": 249283,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081560.017, "dur": 3.368, + "args": { + "External id": 249284,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918081576.698, "dur": 10.911, + "args": { + "External id": 249285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918081588.740, "dur": 10.756, + "args": { + "External id": 249286,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918081607.801, "dur": 1.778, + "args": { + "External id": 249287,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918081617.234, "dur": 3.377, + "args": { + "External id": 249288,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081619.330, "dur": 0.399, + "args": { + "External id": 249289,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918081718.489, "dur": 45.963, + "args": { + "External id": 249290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918081769.388, "dur": 6.114, + "args": { + "External id": 249291,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081772.874, "dur": 1.033, + "args": { + "External id": 249292,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918081776.949, "dur": 23.077, + "args": { + "External id": 249293,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918081804.720, "dur": 6.621, + "args": { + "External id": 249294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918081806.679, "dur": 3.984, + "args": { + "External id": 249295,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081809.082, "dur": 1.360, + "args": { + "External id": 249296,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918081813.649, "dur": 45.144, + "args": { + "External id": 249297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918081815.196, "dur": 42.948, + "args": { + "External id": 249298,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918081862.190, "dur": 17.067, + "args": { + "External id": 249299,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918081884.334, "dur": 25.604, + "args": { + "External id": 249300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918081887.230, "dur": 22.359, + "args": { + "External id": 249301,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081892.520, "dur": 1.089, + "args": { + "External id": 249302,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918081915.266, "dur": 29.026, + "args": { + "External id": 249303,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918081917.328, "dur": 26.725, + "args": { + "External id": 249304,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918081922.729, "dur": 4.482, + "args": { + "External id": 249305,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918081928.450, "dur": 14.985, + "args": { + "External id": 249306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918081955.313, "dur": 5.630, + "args": { + "External id": 249307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918081957.889, "dur": 2.755, + "args": { + "External id": 249308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918081962.097, "dur": 1.595, + "args": { + "External id": 249309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918081962.954, "dur": 0.643, + "args": { + "External id": 249310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918082007.986, "dur": 24.599, + "args": { + "External id": 249311,"Sequence number": 2987736, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918082034.895, "dur": 15.797, + "args": { + "External id": 249312,"Sequence number": 2987737, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "s", "id": 9, "pid": 4183438, "tid": 4183438, "ts": 667918082034.895, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918082056.736, "dur": 5.196, + "args": { + "External id": 249313,"Sequence number": 2987738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082059.630, "dur": 0.762, + "args": { + "External id": 249314,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918082064.523, "dur": 6.863, + "args": { + "External id": 249315,"Sequence number": 2987738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082069.169, "dur": 1.021, + "args": { + "External id": 249316,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918082072.819, "dur": 3.233, + "args": { + "External id": 249317,"Sequence number": 2987738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082074.746, "dur": 0.700, + "args": { + "External id": 249318,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918082079.935, "dur": 7.021, + "args": { + "External id": 249319,"Sequence number": 2987738, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 8150 + } + }, + { + "ph": "s", "id": 8, "pid": 4183438, "tid": 4183438, "ts": 667918082079.935, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082084.563, "dur": 1.146, + "args": { + "External id": 249320,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918082088.330, "dur": 5.254, + "args": { + "External id": 249321,"Sequence number": 2987739, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], [], []], "Ev Idx": 8152 + } + }, + { + "ph": "s", "id": 7, "pid": 4183438, "tid": 4183438, "ts": 667918082088.330, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082091.944, "dur": 0.877, + "args": { + "External id": 249322,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 1024]", "[16777216, 4096, 1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 4183438, "tid": 4183438, + "ts": 667918082094.692, "dur": 5.435, + "args": { + "External id": 249323,"Sequence number": 2987740, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], []], "Input Dims": [[16, 4096, 4, 1024], [], []], "Ev Idx": 8154 + } + }, + { + "ph": "s", "id": 6, "pid": 4183438, "tid": 4183438, "ts": 667918082094.692, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082098.332, "dur": 0.922, + "args": { + "External id": 249324,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "3072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1024, 1], [], [], []], "Input Dims": [[16, 4096, 4, 1024], [], [], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918082101.159, "dur": 4.437, + "args": { + "External id": 249325,"Sequence number": 2987741, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], [], []], "Ev Idx": 8156 + } + }, + { + "ph": "s", "id": 5, "pid": 4183438, "tid": 4183438, "ts": 667918082101.159, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082103.787, "dur": 1.093, + "args": { + "External id": 249326,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1024]", "[16777216, 4096, 1]", "3072"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[16, 4096, 1024], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918082109.738, "dur": 29.682, + "args": { + "External id": 249327,"Sequence number": 2987742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918082111.162, "dur": 28.057, + "args": { + "External id": 249328,"Sequence number": 2987742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082113.851, "dur": 6.931, + "args": { + "External id": 249329,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918082116.151, "dur": 4.058, + "args": { + "External id": 249330,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918082121.687, "dur": 17.051, + "args": { + "External id": 249331,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918082167.878, "dur": 6.864, + "args": { + "External id": 249332,"Sequence number": 2987742, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[16, 4096, 1024], []], "Ev Idx": 8163 + } + }, + { + "ph": "s", "id": 4, "pid": 4183438, "tid": 4183438, "ts": 667918082167.878, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918082176.891, "dur": 1.195, + "args": { + "External id": 249333,"Sequence number": 2987743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 4183438, "tid": 4183438, + "ts": 667918082229.626, "dur": 23109.286, + "args": { + "External id": 249334,"Sequence number": 2987743, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [65536], [32000, 1024], [], [], [], [], []], "Ev Idx": 8165 + } + }, + { + "ph": "s", "id": 3, "pid": 4183438, "tid": 4183438, "ts": 667918082229.626, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 4183438, "tid": 4183438, + "ts": 667918082244.743, "dur": 33.191, + "args": { + "External id": 249335,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918082246.024, "dur": 31.702, + "args": { + "External id": 249336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082247.663, "dur": 7.198, + "args": { + "External id": 249337,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918082249.724, "dur": 4.519, + "args": { + "External id": 249338,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918082255.554, "dur": 21.714, + "args": { + "External id": 249339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [4096, 1], []], "Input Dims": [[65536, 1024], [65536, 1024], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082294.343, "dur": 27.472, + "args": { + "External id": 249340,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082295.578, "dur": 6.669, + "args": { + "External id": 249341,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], [], []], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082297.836, "dur": 4.085, + "args": { + "External id": 249342,"Record function id": 0, "Concrete Inputs": ["[65536, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918082303.779, "dur": 17.826, + "args": { + "External id": 249343,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918082305.535, "dur": 15.624, + "args": { + "External id": 249344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[65536, 1024], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082360.019, "dur": 21.132, + "args": { + "External id": 249345,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918082360.673, "dur": 5.145, + "args": { + "External id": 249346,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082362.601, "dur": 2.999, + "args": { + "External id": 249347,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918082366.353, "dur": 14.615, + "args": { + "External id": 249348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918082366.707, "dur": 13.949, + "args": { + "External id": 249349,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1024, 1], []], "Input Dims": [[32000, 1024], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918082387.453, "dur": 20.083, + "args": { + "External id": 249350,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918082391.478, "dur": 2.789, + "args": { + "External id": 249351,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918082394.961, "dur": 12.323, + "args": { + "External id": 249352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918082395.433, "dur": 11.511, + "args": { + "External id": 249353,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 4183438, + "ts": 667918082412.362, "dur": 22.146, + "args": { + "External id": 249354,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918082437.341, "dur": 51.505, + "args": { + "External id": 249355,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918082439.155, "dur": 49.273, + "args": { + "External id": 249356,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082443.182, "dur": 1.034, + "args": { + "External id": 249357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918082445.799, "dur": 25.685, + "args": { + "External id": 249358,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918082447.289, "dur": 23.975, + "args": { + "External id": 249359,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918082451.674, "dur": 2.746, + "args": { + "External id": 249360,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918082455.380, "dur": 15.505, + "args": { + "External id": 249361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918082493.082, "dur": 17096.153, + "args": { + "External id": 249362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918082494.385, "dur": 17093.734, + "args": { + "External id": 249363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918099601.557, "dur": 7.323, + "args": { + "External id": 249364,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918099605.983, "dur": 1.023, + "args": { + "External id": 249365,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918099614.566, "dur": 135.752, + "args": { + "External id": 249366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918099616.112, "dur": 6.739, + "args": { + "External id": 249367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918099618.459, "dur": 3.431, + "args": { + "External id": 249368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918099620.675, "dur": 0.945, + "args": { + "External id": 249369,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918099624.108, "dur": 125.483, + "args": { + "External id": 249370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918099626.188, "dur": 122.134, + "args": { + "External id": 249371,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918099755.547, "dur": 5.541, + "args": { + "External id": 249372,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918099758.717, "dur": 0.769, + "args": { + "External id": 249373,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918099768.661, "dur": 2.356, + "args": { + "External id": 249374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918099783.998, "dur": 7.364, + "args": { + "External id": 249375,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918099786.319, "dur": 4.728, + "args": { + "External id": 249376,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918099926.197, "dur": 211.056, + "args": { + "External id": 249377,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918099931.416, "dur": 2.209, + "args": { + "External id": 249378,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918099935.690, "dur": 201.106, + "args": { + "External id": 249379,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918099937.350, "dur": 0.482, + "args": { + "External id": 249380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918099939.711, "dur": 26.567, + "args": { + "External id": 249381,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918099968.155, "dur": 5.423, + "args": { + "External id": 249382,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918099972.386, "dur": 0.822, + "args": { + "External id": 249383,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918099974.683, "dur": 26.584, + "args": { + "External id": 249384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918099976.216, "dur": 1.574, + "args": { + "External id": 249385,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918099979.279, "dur": 21.639, + "args": { + "External id": 249386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918099982.885, "dur": 3.027, + "args": { + "External id": 249387,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918100002.973, "dur": 25.639, + "args": { + "External id": 249388,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918100030.473, "dur": 17.086, + "args": { + "External id": 249389,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918100050.283, "dur": 16.539, + "args": { + "External id": 249390,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918100068.429, "dur": 14.622, + "args": { + "External id": 249391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918100085.114, "dur": 22.563, + "args": { + "External id": 249392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918100087.522, "dur": 1.694, + "args": { + "External id": 249393,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100091.708, "dur": 0.871, + "args": { + "External id": 249394,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918100109.406, "dur": 13.047, + "args": { + "External id": 249395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918100123.692, "dur": 12.259, + "args": { + "External id": 249396,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918100143.853, "dur": 2.344, + "args": { + "External id": 249397,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918100152.330, "dur": 4.061, + "args": { + "External id": 249398,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100154.517, "dur": 0.891, + "args": { + "External id": 249399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918100248.499, "dur": 63.112, + "args": { + "External id": 249400,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918100317.102, "dur": 14.863, + "args": { + "External id": 249401,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100327.321, "dur": 1.088, + "args": { + "External id": 249402,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918100333.771, "dur": 28.838, + "args": { + "External id": 249403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918100368.583, "dur": 8.945, + "args": { + "External id": 249404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918100370.580, "dur": 5.948, + "args": { + "External id": 249405,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100373.006, "dur": 3.214, + "args": { + "External id": 249406,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918100381.206, "dur": 50.833, + "args": { + "External id": 249407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918100382.590, "dur": 48.732, + "args": { + "External id": 249408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918100436.090, "dur": 17.006, + "args": { + "External id": 249409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918100459.523, "dur": 4.483, + "args": { + "External id": 249410,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100461.986, "dur": 1.113, + "args": { + "External id": 249411,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918100468.258, "dur": 51.701, + "args": { + "External id": 249412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918100469.309, "dur": 4.788, + "args": { + "External id": 249413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918100470.305, "dur": 2.902, + "args": { + "External id": 249414,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100471.979, "dur": 1.068, + "args": { + "External id": 249415,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918100475.079, "dur": 44.512, + "args": { + "External id": 249416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918100475.880, "dur": 43.139, + "args": { + "External id": 249417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918100524.111, "dur": 4.110, + "args": { + "External id": 249418,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100526.361, "dur": 0.711, + "args": { + "External id": 249419,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918100534.594, "dur": 1.750, + "args": { + "External id": 249420,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918100544.591, "dur": 7.333, + "args": { + "External id": 249421,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918100547.369, "dur": 4.282, + "args": { + "External id": 249422,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918100646.842, "dur": 392.880, + "args": { + "External id": 249423,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918100648.756, "dur": 2.220, + "args": { + "External id": 249424,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918100705.271, "dur": 333.833, + "args": { + "External id": 249425,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918100707.067, "dur": 0.345, + "args": { + "External id": 249426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918100709.281, "dur": 27.091, + "args": { + "External id": 249427,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918100738.028, "dur": 5.799, + "args": { + "External id": 249428,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100740.563, "dur": 2.890, + "args": { + "External id": 249429,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918100744.916, "dur": 23.919, + "args": { + "External id": 249430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918100746.530, "dur": 1.367, + "args": { + "External id": 249431,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918100749.225, "dur": 19.315, + "args": { + "External id": 249432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918100752.232, "dur": 3.000, + "args": { + "External id": 249433,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918100770.575, "dur": 29.455, + "args": { + "External id": 249434,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918100802.015, "dur": 35.758, + "args": { + "External id": 249435,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918100840.453, "dur": 44.354, + "args": { + "External id": 249436,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918100886.591, "dur": 42.673, + "args": { + "External id": 249437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918100931.417, "dur": 43.098, + "args": { + "External id": 249438,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918100933.472, "dur": 2.240, + "args": { + "External id": 249439,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918100938.057, "dur": 2.923, + "args": { + "External id": 249440,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918100976.201, "dur": 31.831, + "args": { + "External id": 249441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918101009.385, "dur": 28.874, + "args": { + "External id": 249442,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918101048.230, "dur": 3.100, + "args": { + "External id": 249443,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101060.720, "dur": 4.280, + "args": { + "External id": 249444,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101063.316, "dur": 0.524, + "args": { + "External id": 249445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918101138.831, "dur": 51.169, + "args": { + "External id": 249446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101209.316, "dur": 6.945, + "args": { + "External id": 249447,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101212.966, "dur": 1.373, + "args": { + "External id": 249448,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "8388608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918101217.832, "dur": 25.878, + "args": { + "External id": 249449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918101259.121, "dur": 8.281, + "args": { + "External id": 249450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918101261.627, "dur": 5.022, + "args": { + "External id": 249451,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101263.669, "dur": 2.769, + "args": { + "External id": 249452,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918101270.698, "dur": 49.762, + "args": { + "External id": 249453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918101272.202, "dur": 47.580, + "args": { + "External id": 249454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918101324.645, "dur": 16.644, + "args": { + "External id": 249455,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101347.542, "dur": 4.165, + "args": { + "External id": 249456,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101350.098, "dur": 0.714, + "args": { + "External id": 249457,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918101355.546, "dur": 47.653, + "args": { + "External id": 249458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918101356.627, "dur": 3.397, + "args": { + "External id": 249459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918101357.087, "dur": 2.335, + "args": { + "External id": 249460,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101358.744, "dur": 0.529, + "args": { + "External id": 249461,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918101360.715, "dur": 42.089, + "args": { + "External id": 249462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918101361.439, "dur": 40.774, + "args": { + "External id": 249463,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101407.360, "dur": 3.851, + "args": { + "External id": 249464,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101409.057, "dur": 0.776, + "args": { + "External id": 249465,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918101417.326, "dur": 2.096, + "args": { + "External id": 249466,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918101428.039, "dur": 6.676, + "args": { + "External id": 249467,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918101430.521, "dur": 3.910, + "args": { + "External id": 249468,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918101522.468, "dur": 222.606, + "args": { + "External id": 249469,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918101525.140, "dur": 2.610, + "args": { + "External id": 249470,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918101531.785, "dur": 212.639, + "args": { + "External id": 249471,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918101533.331, "dur": 0.323, + "args": { + "External id": 249472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918101534.924, "dur": 20.054, + "args": { + "External id": 249473,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918101556.801, "dur": 5.682, + "args": { + "External id": 249474,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101559.034, "dur": 2.954, + "args": { + "External id": 249475,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918101563.432, "dur": 21.926, + "args": { + "External id": 249476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918101564.685, "dur": 2.041, + "args": { + "External id": 249477,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918101567.965, "dur": 16.996, + "args": { + "External id": 249478,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918101570.536, "dur": 2.673, + "args": { + "External id": 249479,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918101589.458, "dur": 18.525, + "args": { + "External id": 249480,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918101609.383, "dur": 16.098, + "args": { + "External id": 249481,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918101627.725, "dur": 13.412, + "args": { + "External id": 249482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918101642.626, "dur": 48.008, + "args": { + "External id": 249483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918101694.205, "dur": 22.046, + "args": { + "External id": 249484,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918101696.480, "dur": 2.093, + "args": { + "External id": 249485,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101700.808, "dur": 1.065, + "args": { + "External id": 249486,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918101718.169, "dur": 12.188, + "args": { + "External id": 249487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918101731.719, "dur": 11.443, + "args": { + "External id": 249488,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918101753.269, "dur": 2.597, + "args": { + "External id": 249489,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101765.105, "dur": 4.358, + "args": { + "External id": 249490,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101767.943, "dur": 0.483, + "args": { + "External id": 249491,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918101841.547, "dur": 46.681, + "args": { + "External id": 249492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918101892.891, "dur": 5.361, + "args": { + "External id": 249493,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101895.976, "dur": 0.944, + "args": { + "External id": 249494,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918101899.780, "dur": 22.945, + "args": { + "External id": 249495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918101927.566, "dur": 6.420, + "args": { + "External id": 249496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918101929.084, "dur": 4.185, + "args": { + "External id": 249497,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918101931.318, "dur": 1.726, + "args": { + "External id": 249498,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918101936.499, "dur": 42.585, + "args": { + "External id": 249499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918101937.544, "dur": 40.832, + "args": { + "External id": 249500,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918101983.022, "dur": 13.981, + "args": { + "External id": 249501,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102002.507, "dur": 3.905, + "args": { + "External id": 249502,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102004.620, "dur": 0.910, + "args": { + "External id": 249503,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918102010.281, "dur": 48.408, + "args": { + "External id": 249504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918102011.372, "dur": 4.189, + "args": { + "External id": 249505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918102012.006, "dur": 3.024, + "args": { + "External id": 249506,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102014.154, "dur": 0.731, + "args": { + "External id": 249507,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918102016.331, "dur": 42.030, + "args": { + "External id": 249508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918102016.821, "dur": 40.952, + "args": { + "External id": 249509,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102062.525, "dur": 3.687, + "args": { + "External id": 249510,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102064.315, "dur": 0.645, + "args": { + "External id": 249511,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918102071.482, "dur": 1.598, + "args": { + "External id": 249512,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102080.761, "dur": 8.724, + "args": { + "External id": 249513,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102082.881, "dur": 6.285, + "args": { + "External id": 249514,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918102169.567, "dur": 192.699, + "args": { + "External id": 249515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102171.676, "dur": 2.117, + "args": { + "External id": 249516,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918102175.338, "dur": 186.172, + "args": { + "External id": 249517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918102177.022, "dur": 0.503, + "args": { + "External id": 249518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918102178.828, "dur": 38.158, + "args": { + "External id": 249519,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918102219.672, "dur": 3.579, + "args": { + "External id": 249520,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102222.059, "dur": 0.849, + "args": { + "External id": 249521,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918102224.346, "dur": 25.618, + "args": { + "External id": 249522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102225.417, "dur": 2.341, + "args": { + "External id": 249523,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918102229.004, "dur": 20.605, + "args": { + "External id": 249524,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918102234.549, "dur": 2.772, + "args": { + "External id": 249525,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918102251.466, "dur": 20.407, + "args": { + "External id": 249526,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918102273.332, "dur": 12.166, + "args": { + "External id": 249527,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918102287.983, "dur": 14.319, + "args": { + "External id": 249528,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918102303.667, "dur": 11.657, + "args": { + "External id": 249529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918102317.073, "dur": 19.057, + "args": { + "External id": 249530,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918102319.229, "dur": 1.684, + "args": { + "External id": 249531,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102323.067, "dur": 0.932, + "args": { + "External id": 249532,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918102337.604, "dur": 10.660, + "args": { + "External id": 249533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918102349.587, "dur": 10.714, + "args": { + "External id": 249534,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918102369.423, "dur": 2.508, + "args": { + "External id": 249535,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102380.693, "dur": 3.757, + "args": { + "External id": 249536,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102383.273, "dur": 0.424, + "args": { + "External id": 249537,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918102447.904, "dur": 42.468, + "args": { + "External id": 249538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102494.801, "dur": 5.279, + "args": { + "External id": 249539,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102497.670, "dur": 1.231, + "args": { + "External id": 249540,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "25165824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918102501.467, "dur": 23.105, + "args": { + "External id": 249541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918102529.152, "dur": 6.091, + "args": { + "External id": 249542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918102530.861, "dur": 3.706, + "args": { + "External id": 249543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102533.259, "dur": 1.137, + "args": { + "External id": 249544,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918102537.728, "dur": 43.682, + "args": { + "External id": 249545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918102538.673, "dur": 42.004, + "args": { + "External id": 249546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918102585.148, "dur": 13.802, + "args": { + "External id": 249547,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102604.608, "dur": 3.843, + "args": { + "External id": 249548,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102606.851, "dur": 0.742, + "args": { + "External id": 249549,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918102612.365, "dur": 89.622, + "args": { + "External id": 249550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918102613.605, "dur": 5.868, + "args": { + "External id": 249551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918102614.499, "dur": 4.356, + "args": { + "External id": 249552,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102615.913, "dur": 2.773, + "args": { + "External id": 249553,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918102620.118, "dur": 81.263, + "args": { + "External id": 249554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918102620.783, "dur": 79.581, + "args": { + "External id": 249555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918102708.342, "dur": 4.859, + "args": { + "External id": 249556,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102710.883, "dur": 0.904, + "args": { + "External id": 249557,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918102719.324, "dur": 1.868, + "args": { + "External id": 249558,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102729.675, "dur": 7.304, + "args": { + "External id": 249559,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102732.304, "dur": 4.405, + "args": { + "External id": 249560,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918102829.523, "dur": 175.490, + "args": { + "External id": 249561,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102832.056, "dur": 2.108, + "args": { + "External id": 249562,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918102835.981, "dur": 168.522, + "args": { + "External id": 249563,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918102837.152, "dur": 0.695, + "args": { + "External id": 249564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918102841.589, "dur": 21.639, + "args": { + "External id": 249565,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918102865.120, "dur": 3.534, + "args": { + "External id": 249566,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102867.352, "dur": 1.043, + "args": { + "External id": 249567,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918102869.578, "dur": 23.592, + "args": { + "External id": 249568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918102870.911, "dur": 3.440, + "args": { + "External id": 249569,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918102875.875, "dur": 16.920, + "args": { + "External id": 249570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918102878.646, "dur": 2.422, + "args": { + "External id": 249571,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918102894.851, "dur": 18.764, + "args": { + "External id": 249572,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918102915.075, "dur": 12.863, + "args": { + "External id": 249573,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918102930.438, "dur": 12.948, + "args": { + "External id": 249574,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918102944.873, "dur": 12.034, + "args": { + "External id": 249575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918102958.670, "dur": 18.678, + "args": { + "External id": 249576,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918102960.791, "dur": 1.497, + "args": { + "External id": 249577,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918102964.720, "dur": 0.960, + "args": { + "External id": 249578,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918102979.088, "dur": 11.856, + "args": { + "External id": 249579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918102992.361, "dur": 10.938, + "args": { + "External id": 249580,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918103010.821, "dur": 1.640, + "args": { + "External id": 249581,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103020.975, "dur": 3.511, + "args": { + "External id": 249582,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103023.325, "dur": 0.358, + "args": { + "External id": 249583,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103087.938, "dur": 44.592, + "args": { + "External id": 249584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103137.050, "dur": 5.283, + "args": { + "External id": 249585,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103139.611, "dur": 1.512, + "args": { + "External id": 249586,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918103143.783, "dur": 21.593, + "args": { + "External id": 249587,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918103170.042, "dur": 7.478, + "args": { + "External id": 249588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918103171.695, "dur": 5.118, + "args": { + "External id": 249589,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103173.678, "dur": 2.940, + "args": { + "External id": 249590,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918103179.818, "dur": 60.344, + "args": { + "External id": 249591,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103180.849, "dur": 58.276, + "args": { + "External id": 249592,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918103245.464, "dur": 15.646, + "args": { + "External id": 249593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103266.912, "dur": 4.410, + "args": { + "External id": 249594,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103269.266, "dur": 1.057, + "args": { + "External id": 249595,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918103275.115, "dur": 50.150, + "args": { + "External id": 249596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918103276.149, "dur": 4.306, + "args": { + "External id": 249597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918103276.919, "dur": 2.976, + "args": { + "External id": 249598,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103278.518, "dur": 1.117, + "args": { + "External id": 249599,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918103281.239, "dur": 43.629, + "args": { + "External id": 249600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103282.250, "dur": 42.025, + "args": { + "External id": 249601,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103329.202, "dur": 3.790, + "args": { + "External id": 249602,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103331.099, "dur": 0.866, + "args": { + "External id": 249603,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918103339.024, "dur": 1.427, + "args": { + "External id": 249604,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918103348.264, "dur": 7.113, + "args": { + "External id": 249605,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918103350.780, "dur": 4.330, + "args": { + "External id": 249606,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918103441.027, "dur": 177.872, + "args": { + "External id": 249607,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918103443.458, "dur": 1.983, + "args": { + "External id": 249608,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918103449.485, "dur": 168.869, + "args": { + "External id": 249609,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918103450.886, "dur": 0.349, + "args": { + "External id": 249610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918103452.457, "dur": 19.860, + "args": { + "External id": 249611,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918103473.904, "dur": 6.356, + "args": { + "External id": 249612,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103476.456, "dur": 3.470, + "args": { + "External id": 249613,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918103481.250, "dur": 21.247, + "args": { + "External id": 249614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918103482.115, "dur": 1.400, + "args": { + "External id": 249615,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918103484.613, "dur": 17.541, + "args": { + "External id": 249616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918103487.526, "dur": 2.913, + "args": { + "External id": 249617,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918103504.057, "dur": 18.989, + "args": { + "External id": 249618,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918103524.390, "dur": 12.718, + "args": { + "External id": 249619,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918103539.684, "dur": 13.396, + "args": { + "External id": 249620,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918103554.291, "dur": 10.952, + "args": { + "External id": 249621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918103566.755, "dur": 22.973, + "args": { + "External id": 249622,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918103568.678, "dur": 2.011, + "args": { + "External id": 249623,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103572.834, "dur": 2.899, + "args": { + "External id": 249624,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918103591.661, "dur": 12.835, + "args": { + "External id": 249625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918103605.447, "dur": 11.965, + "args": { + "External id": 249626,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918103625.042, "dur": 1.465, + "args": { + "External id": 249627,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103649.099, "dur": 40.038, + "args": { + "External id": 249628,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103686.553, "dur": 0.902, + "args": { + "External id": 249629,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103763.910, "dur": 52.474, + "args": { + "External id": 249630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103821.437, "dur": 6.048, + "args": { + "External id": 249631,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103824.655, "dur": 1.530, + "args": { + "External id": 249632,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "41943040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918103828.782, "dur": 28.227, + "args": { + "External id": 249633,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918103862.092, "dur": 7.680, + "args": { + "External id": 249634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918103863.776, "dur": 5.364, + "args": { + "External id": 249635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103866.052, "dur": 2.857, + "args": { + "External id": 249636,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918103872.343, "dur": 57.202, + "args": { + "External id": 249637,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103873.467, "dur": 55.395, + "args": { + "External id": 249638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918103933.231, "dur": 20.471, + "args": { + "External id": 249639,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918103959.019, "dur": 4.095, + "args": { + "External id": 249640,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103961.676, "dur": 0.708, + "args": { + "External id": 249641,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918103966.947, "dur": 53.765, + "args": { + "External id": 249642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918103967.868, "dur": 4.078, + "args": { + "External id": 249643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918103968.749, "dur": 2.593, + "args": { + "External id": 249644,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918103970.449, "dur": 0.728, + "args": { + "External id": 249645,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918103972.494, "dur": 47.714, + "args": { + "External id": 249646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918103973.182, "dur": 46.533, + "args": { + "External id": 249647,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104024.567, "dur": 3.590, + "args": { + "External id": 249648,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104026.305, "dur": 0.696, + "args": { + "External id": 249649,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918104034.152, "dur": 1.709, + "args": { + "External id": 249650,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104043.930, "dur": 6.885, + "args": { + "External id": 249651,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104046.212, "dur": 4.334, + "args": { + "External id": 249652,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918104129.865, "dur": 206.045, + "args": { + "External id": 249653,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104131.702, "dur": 2.165, + "args": { + "External id": 249654,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918104137.979, "dur": 197.247, + "args": { + "External id": 249655,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918104139.313, "dur": 0.596, + "args": { + "External id": 249656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918104141.513, "dur": 22.063, + "args": { + "External id": 249657,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918104165.627, "dur": 5.046, + "args": { + "External id": 249658,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104167.825, "dur": 2.531, + "args": { + "External id": 249659,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918104171.663, "dur": 38.350, + "args": { + "External id": 249660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104172.909, "dur": 1.471, + "args": { + "External id": 249661,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918104175.637, "dur": 33.630, + "args": { + "External id": 249662,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918104178.224, "dur": 2.478, + "args": { + "External id": 249663,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918104211.999, "dur": 22.762, + "args": { + "External id": 249664,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918104236.216, "dur": 14.660, + "args": { + "External id": 249665,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918104253.099, "dur": 14.412, + "args": { + "External id": 249666,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918104269.010, "dur": 13.264, + "args": { + "External id": 249667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918104284.141, "dur": 23.308, + "args": { + "External id": 249668,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918104286.250, "dur": 2.143, + "args": { + "External id": 249669,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104290.634, "dur": 2.797, + "args": { + "External id": 249670,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918104309.094, "dur": 12.785, + "args": { + "External id": 249671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918104323.070, "dur": 11.112, + "args": { + "External id": 249672,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918104342.924, "dur": 1.947, + "args": { + "External id": 249673,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104353.285, "dur": 4.142, + "args": { + "External id": 249674,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104355.903, "dur": 0.474, + "args": { + "External id": 249675,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918104422.503, "dur": 44.532, + "args": { + "External id": 249676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104471.396, "dur": 5.670, + "args": { + "External id": 249677,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104474.382, "dur": 1.492, + "args": { + "External id": 249678,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918104478.551, "dur": 22.290, + "args": { + "External id": 249679,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918104505.354, "dur": 6.827, + "args": { + "External id": 249680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918104507.229, "dur": 4.088, + "args": { + "External id": 249681,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104509.600, "dur": 1.527, + "args": { + "External id": 249682,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918104514.657, "dur": 41.857, + "args": { + "External id": 249683,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918104515.784, "dur": 40.042, + "args": { + "External id": 249684,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918104560.021, "dur": 14.600, + "args": { + "External id": 249685,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104580.043, "dur": 3.580, + "args": { + "External id": 249686,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104581.884, "dur": 0.931, + "args": { + "External id": 249687,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 4183438, "tid": 4183438, + "ts": 667918104587.378, "dur": 47.497, + "args": { + "External id": 249688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [32000, 1024], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918104588.725, "dur": 4.084, + "args": { + "External id": 249689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918104589.389, "dur": 2.872, + "args": { + "External id": 249690,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32000, 1024], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104591.150, "dur": 0.973, + "args": { + "External id": 249691,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32000]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32000, 1024], [], [], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918104593.454, "dur": 41.038, + "args": { + "External id": 249692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918104594.313, "dur": 39.545, + "args": { + "External id": 249693,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [1, 1024]], "Input Dims": [[8192, 1024], [1024, 32000]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104638.469, "dur": 3.897, + "args": { + "External id": 249694,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104640.365, "dur": 0.834, + "args": { + "External id": 249695,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918104647.123, "dur": 1.637, + "args": { + "External id": 249696,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104692.440, "dur": 8.133, + "args": { + "External id": 249697,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104694.319, "dur": 5.699, + "args": { + "External id": 249698,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918104785.650, "dur": 184.605, + "args": { + "External id": 249699,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104790.335, "dur": 2.398, + "args": { + "External id": 249700,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 4183438, "tid": 4183438, + "ts": 667918104794.150, "dur": 175.499, + "args": { + "External id": 249701,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 4183438, "tid": 4183438, + "ts": 667918104795.574, "dur": 0.860, + "args": { + "External id": 249702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 4183438, "tid": 4183438, + "ts": 667918104797.635, "dur": 23.162, + "args": { + "External id": 249703,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 4183438, "tid": 4183438, + "ts": 667918104822.374, "dur": 6.203, + "args": { + "External id": 249704,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104827.007, "dur": 1.245, + "args": { + "External id": 249705,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918104829.601, "dur": 21.759, + "args": { + "External id": 249706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918104830.617, "dur": 1.520, + "args": { + "External id": 249707,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918104833.278, "dur": 17.739, + "args": { + "External id": 249708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918104835.696, "dur": 2.402, + "args": { + "External id": 249709,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918104853.203, "dur": 20.808, + "args": { + "External id": 249710,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918104875.725, "dur": 12.150, + "args": { + "External id": 249711,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 4183438, "tid": 4183438, + "ts": 667918104890.119, "dur": 12.975, + "args": { + "External id": 249712,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 4183438, "tid": 4183438, + "ts": 667918104904.539, "dur": 11.494, + "args": { + "External id": 249713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918104917.789, "dur": 21.949, + "args": { + "External id": 249714,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918104922.211, "dur": 1.755, + "args": { + "External id": 249715,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104926.566, "dur": 0.617, + "args": { + "External id": 249716,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 4183438, "tid": 4183438, + "ts": 667918104941.511, "dur": 14.601, + "args": { + "External id": 249717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918104957.109, "dur": 11.365, + "args": { + "External id": 249718,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918104976.316, "dur": 2.021, + "args": { + "External id": 249719,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918104985.942, "dur": 3.819, + "args": { + "External id": 249720,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918104988.235, "dur": 0.400, + "args": { + "External id": 249721,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918105048.637, "dur": 44.331, + "args": { + "External id": 249722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [1024, 1]], "Input Dims": [[8192, 32000], [32000, 1024]], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 4183438, "tid": 4183438, + "ts": 667918105097.514, "dur": 13.946, + "args": { + "External id": 249723,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], [], [], []], "Input Dims": [[65536, 1024], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918105109.178, "dur": 0.806, + "args": { + "External id": 249724,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1024]", "[1024, 1]", "58720256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[65536, 1024], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918105112.920, "dur": 22.601, + "args": { + "External id": 249725,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[8192, 1024], [8192, 1024], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 4183438, "tid": 4183438, + "ts": 667918105140.520, "dur": 7.616, + "args": { + "External id": 249726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 4183438, "tid": 4183438, + "ts": 667918105142.356, "dur": 5.187, + "args": { + "External id": 249727,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918105144.512, "dur": 2.850, + "args": { + "External id": 249728,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 4183438, "tid": 4183438, + "ts": 667918105150.726, "dur": 59.794, + "args": { + "External id": 249729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 4183438, "tid": 4183438, + "ts": 667918105152.014, "dur": 57.593, + "args": { + "External id": 249730,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [1024, 1]], "Input Dims": [[32000, 8192], [8192, 1024]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918105215.887, "dur": 16.606, + "args": { + "External id": 249731,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918105237.953, "dur": 24.575, + "args": { + "External id": 249732,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 4183438, "tid": 4183438, + "ts": 667918105240.622, "dur": 21.540, + "args": { + "External id": 249733,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918105246.146, "dur": 1.522, + "args": { + "External id": 249734,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918105268.174, "dur": 25.547, + "args": { + "External id": 249735,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 4183438, "tid": 4183438, + "ts": 667918105270.523, "dur": 22.994, + "args": { + "External id": 249736,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1024, 1], [], [], [], [], [], []], "Input Dims": [[32000, 1024], [], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918105275.660, "dur": 3.790, + "args": { + "External id": 249737,"Record function id": 0, "Concrete Inputs": ["[32000, 1024]", "[1024, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918105280.458, "dur": 12.556, + "args": { + "External id": 249738,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[1024, 1], [1024, 1], []], "Input Dims": [[32000, 1024], [32000, 1024], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918105305.023, "dur": 5.941, + "args": { + "External id": 249739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918105307.617, "dur": 3.050, + "args": { + "External id": 249740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[65536, 1024]], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 4183438, "tid": 4183438, + "ts": 667918105312.108, "dur": 1.725, + "args": { + "External id": 249741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 4183438, "tid": 4183438, + "ts": 667918105313.122, "dur": 0.631, + "args": { + "External id": 249742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32000, 1024]], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918105356.376, "dur": 21.528, + "args": { + "External id": 249743,"Sequence number": 2987744, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 4183438, "tid": 4183438, + "ts": 667918105380.062, "dur": 12.250, + "args": { + "External id": 249744,"Sequence number": 2987745, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "s", "id": 2, "pid": 4183438, "tid": 4183438, "ts": 667918105380.062, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 4183438, "tid": 4183438, + "ts": 667918105496.684, "dur": 40.428, + "args": { + "External id": 249745,"Record function id": 0, "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 4183438, "tid": 4183438, + "ts": 667918105642.231, "dur": 100.983, + "args": { + "External id": 249746,"Sequence number": 2987746, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8577 + } + }, + { + "ph": "s", "id": 1, "pid": 4183438, "tid": 4183438, "ts": 667918105642.231, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 4183438, "tid": 4183438, + "ts": 667918105809.677, "dur": 27.713, + "args": { + "External id": 249747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918105811.363, "dur": 8.575, + "args": { + "External id": 249748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918105814.944, "dur": 4.349, + "args": { + "External id": 249749,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918105821.589, "dur": 15.442, + "args": { + "External id": 249750,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 4183438, "tid": 4183438, + "ts": 667918555272.639, "dur": 46.518, + "args": { + "External id": 249751,"Sequence number": 2987747, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 4183438, "tid": 4183438, + "ts": 667918555326.340, "dur": 20.981, + "args": { + "External id": 249752,"Sequence number": 2987748, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918555426.014, "dur": 23.930, + "args": { + "External id": 249753,"Sequence number": 2987749, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918555907.428, "dur": 29.699, + "args": { + "External id": 249754,"Sequence number": 2987750, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918555943.728, "dur": 14.343, + "args": { + "External id": 249755,"Sequence number": 2987751, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 4183438, "tid": 4183438, + "ts": 667918557412.529, "dur": 2379.376, + "args": { + "External id": 249756,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 4183438, "tid": 4183438, + "ts": 667918557911.542, "dur": 655.784, + "args": { + "External id": 249757,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 4183438, "tid": 4183438, + "ts": 667918557930.168, "dur": 69.229, + "args": { + "External id": 249758,"Record function id": 0, "Concrete Inputs": ["[13797]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918557933.568, "dur": 15.699, + "args": { + "External id": 249759,"Record function id": 0, "Concrete Inputs": ["[13797]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 4183438, "tid": 4183438, + "ts": 667918557952.174, "dur": 46.942, + "args": { + "External id": 249760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[13797]], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 4183438, "tid": 4183438, + "ts": 667918557954.726, "dur": 43.727, + "args": { + "External id": 249761,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[13797], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559819.599, "dur": 2.921, + "args": { + "External id": 249762,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559824.749, "dur": 0.303, + "args": { + "External id": 249763,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559826.231, "dur": 0.450, + "args": { + "External id": 249764,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559827.791, "dur": 0.538, + "args": { + "External id": 249765,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559829.378, "dur": 0.518, + "args": { + "External id": 249766,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559831.034, "dur": 0.428, + "args": { + "External id": 249767,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559832.659, "dur": 0.449, + "args": { + "External id": 249768,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559836.089, "dur": 0.405, + "args": { + "External id": 249769,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559837.501, "dur": 0.479, + "args": { + "External id": 249770,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559839.001, "dur": 0.444, + "args": { + "External id": 249771,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559840.541, "dur": 0.521, + "args": { + "External id": 249772,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559841.881, "dur": 0.331, + "args": { + "External id": 249773,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559843.024, "dur": 0.518, + "args": { + "External id": 249774,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559844.395, "dur": 0.527, + "args": { + "External id": 249775,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559845.741, "dur": 0.542, + "args": { + "External id": 249776,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559849.094, "dur": 0.221, + "args": { + "External id": 249777,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559850.077, "dur": 0.403, + "args": { + "External id": 249778,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559851.473, "dur": 0.410, + "args": { + "External id": 249779,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559853.002, "dur": 0.430, + "args": { + "External id": 249780,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559854.187, "dur": 0.416, + "args": { + "External id": 249781,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559858.946, "dur": 0.414, + "args": { + "External id": 249782,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559860.309, "dur": 0.329, + "args": { + "External id": 249783,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559861.566, "dur": 0.305, + "args": { + "External id": 249784,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559864.536, "dur": 0.230, + "args": { + "External id": 249785,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559865.572, "dur": 0.202, + "args": { + "External id": 249786,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559866.942, "dur": 0.240, + "args": { + "External id": 249787,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559868.045, "dur": 0.207, + "args": { + "External id": 249788,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559869.049, "dur": 0.205, + "args": { + "External id": 249789,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559870.089, "dur": 0.206, + "args": { + "External id": 249790,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559871.080, "dur": 0.211, + "args": { + "External id": 249791,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559872.087, "dur": 0.223, + "args": { + "External id": 249792,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559874.653, "dur": 0.225, + "args": { + "External id": 249793,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559875.656, "dur": 0.199, + "args": { + "External id": 249794,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559876.881, "dur": 0.191, + "args": { + "External id": 249795,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559877.842, "dur": 0.205, + "args": { + "External id": 249796,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559878.952, "dur": 0.198, + "args": { + "External id": 249797,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559879.978, "dur": 0.206, + "args": { + "External id": 249798,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559880.953, "dur": 0.218, + "args": { + "External id": 249799,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559882.018, "dur": 0.204, + "args": { + "External id": 249800,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559884.978, "dur": 0.203, + "args": { + "External id": 249801,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559886.156, "dur": 0.210, + "args": { + "External id": 249802,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559887.432, "dur": 0.204, + "args": { + "External id": 249803,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559888.653, "dur": 0.204, + "args": { + "External id": 249804,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559889.618, "dur": 0.332, + "args": { + "External id": 249805,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559890.711, "dur": 0.328, + "args": { + "External id": 249806,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559891.956, "dur": 0.208, + "args": { + "External id": 249807,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559892.936, "dur": 0.350, + "args": { + "External id": 249808,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559896.334, "dur": 0.385, + "args": { + "External id": 249809,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559897.504, "dur": 0.213, + "args": { + "External id": 249810,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559898.509, "dur": 0.203, + "args": { + "External id": 249811,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559899.539, "dur": 0.365, + "args": { + "External id": 249812,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559900.694, "dur": 0.368, + "args": { + "External id": 249813,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559901.911, "dur": 0.247, + "args": { + "External id": 249814,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559903.224, "dur": 0.195, + "args": { + "External id": 249815,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559904.176, "dur": 0.201, + "args": { + "External id": 249816,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559907.590, "dur": 0.199, + "args": { + "External id": 249817,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559908.870, "dur": 0.212, + "args": { + "External id": 249818,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559910.043, "dur": 0.196, + "args": { + "External id": 249819,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559910.990, "dur": 0.235, + "args": { + "External id": 249820,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559912.067, "dur": 0.230, + "args": { + "External id": 249821,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559913.201, "dur": 0.202, + "args": { + "External id": 249822,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559917.966, "dur": 0.203, + "args": { + "External id": 249823,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559918.981, "dur": 0.200, + "args": { + "External id": 249824,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559922.102, "dur": 0.202, + "args": { + "External id": 249825,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559923.384, "dur": 0.205, + "args": { + "External id": 249826,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559924.680, "dur": 0.229, + "args": { + "External id": 249827,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559925.849, "dur": 0.222, + "args": { + "External id": 249828,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559926.957, "dur": 0.197, + "args": { + "External id": 249829,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559928.096, "dur": 0.202, + "args": { + "External id": 249830,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559929.146, "dur": 0.204, + "args": { + "External id": 249831,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559930.095, "dur": 0.202, + "args": { + "External id": 249832,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559932.562, "dur": 0.202, + "args": { + "External id": 249833,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559933.588, "dur": 0.200, + "args": { + "External id": 249834,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559934.556, "dur": 0.197, + "args": { + "External id": 249835,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559935.608, "dur": 0.238, + "args": { + "External id": 249836,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559936.651, "dur": 0.227, + "args": { + "External id": 249837,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559937.630, "dur": 0.230, + "args": { + "External id": 249838,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559939.316, "dur": 0.224, + "args": { + "External id": 249839,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559940.327, "dur": 0.234, + "args": { + "External id": 249840,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559942.913, "dur": 0.247, + "args": { + "External id": 249841,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559943.966, "dur": 0.202, + "args": { + "External id": 249842,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559944.939, "dur": 0.199, + "args": { + "External id": 249843,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559945.891, "dur": 0.318, + "args": { + "External id": 249844,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559947.001, "dur": 0.199, + "args": { + "External id": 249845,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559947.967, "dur": 0.354, + "args": { + "External id": 249846,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559949.071, "dur": 0.335, + "args": { + "External id": 249847,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559950.197, "dur": 0.433, + "args": { + "External id": 249848,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559953.146, "dur": 0.361, + "args": { + "External id": 249849,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559954.415, "dur": 0.302, + "args": { + "External id": 249850,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559955.481, "dur": 0.196, + "args": { + "External id": 249851,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559956.465, "dur": 0.334, + "args": { + "External id": 249852,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559957.615, "dur": 0.305, + "args": { + "External id": 249853,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559958.673, "dur": 0.311, + "args": { + "External id": 249854,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559960.501, "dur": 0.223, + "args": { + "External id": 249855,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559961.553, "dur": 0.333, + "args": { + "External id": 249856,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918559999.492, "dur": 0.295, + "args": { + "External id": 254465,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560000.902, "dur": 0.197, + "args": { + "External id": 254466,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560001.884, "dur": 0.207, + "args": { + "External id": 254467,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560002.954, "dur": 0.254, + "args": { + "External id": 254468,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560004.028, "dur": 0.209, + "args": { + "External id": 254469,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560005.055, "dur": 0.233, + "args": { + "External id": 254470,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560006.147, "dur": 0.203, + "args": { + "External id": 254471,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560007.283, "dur": 0.196, + "args": { + "External id": 254472,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560010.263, "dur": 0.206, + "args": { + "External id": 254473,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560011.284, "dur": 0.199, + "args": { + "External id": 254474,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560012.268, "dur": 0.203, + "args": { + "External id": 254475,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560013.231, "dur": 0.201, + "args": { + "External id": 254476,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560014.203, "dur": 0.207, + "args": { + "External id": 254477,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560015.211, "dur": 0.228, + "args": { + "External id": 254478,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560016.948, "dur": 0.208, + "args": { + "External id": 254479,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560017.948, "dur": 0.207, + "args": { + "External id": 254480,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560020.125, "dur": 0.212, + "args": { + "External id": 254481,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560021.151, "dur": 0.211, + "args": { + "External id": 254482,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560022.111, "dur": 0.234, + "args": { + "External id": 254483,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560023.126, "dur": 0.197, + "args": { + "External id": 254484,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560024.108, "dur": 0.209, + "args": { + "External id": 254485,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560025.065, "dur": 0.198, + "args": { + "External id": 254486,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560026.068, "dur": 0.205, + "args": { + "External id": 254487,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560030.302, "dur": 0.245, + "args": { + "External id": 254488,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560033.062, "dur": 0.196, + "args": { + "External id": 254489,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560034.008, "dur": 0.204, + "args": { + "External id": 254490,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560034.979, "dur": 0.211, + "args": { + "External id": 254491,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560035.970, "dur": 0.218, + "args": { + "External id": 254492,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560037.001, "dur": 0.202, + "args": { + "External id": 254493,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560037.991, "dur": 0.205, + "args": { + "External id": 254494,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560038.968, "dur": 0.215, + "args": { + "External id": 254495,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560039.969, "dur": 0.205, + "args": { + "External id": 254496,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560042.113, "dur": 0.199, + "args": { + "External id": 254497,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560043.136, "dur": 0.207, + "args": { + "External id": 254498,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560044.687, "dur": 0.199, + "args": { + "External id": 254499,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560045.654, "dur": 0.210, + "args": { + "External id": 254500,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560046.619, "dur": 0.201, + "args": { + "External id": 254501,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560047.572, "dur": 0.235, + "args": { + "External id": 254502,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560048.585, "dur": 0.200, + "args": { + "External id": 254503,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560049.538, "dur": 0.209, + "args": { + "External id": 254504,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560051.704, "dur": 0.197, + "args": { + "External id": 254505,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560052.650, "dur": 0.204, + "args": { + "External id": 254506,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560053.639, "dur": 0.199, + "args": { + "External id": 254507,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560054.656, "dur": 0.214, + "args": { + "External id": 254508,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560055.640, "dur": 0.236, + "args": { + "External id": 254509,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560056.714, "dur": 0.216, + "args": { + "External id": 254510,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560057.683, "dur": 0.202, + "args": { + "External id": 254511,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560058.681, "dur": 0.206, + "args": { + "External id": 254512,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560061.578, "dur": 0.206, + "args": { + "External id": 254513,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560062.633, "dur": 0.212, + "args": { + "External id": 254514,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560063.610, "dur": 0.201, + "args": { + "External id": 254515,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560064.686, "dur": 0.240, + "args": { + "External id": 254516,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560065.710, "dur": 0.200, + "args": { + "External id": 254517,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560066.677, "dur": 0.206, + "args": { + "External id": 254518,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560068.031, "dur": 0.200, + "args": { + "External id": 254519,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560069.036, "dur": 0.212, + "args": { + "External id": 254520,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560071.518, "dur": 0.205, + "args": { + "External id": 254521,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560072.479, "dur": 0.205, + "args": { + "External id": 254522,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560073.480, "dur": 0.205, + "args": { + "External id": 254523,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560074.462, "dur": 0.235, + "args": { + "External id": 254524,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560075.523, "dur": 0.206, + "args": { + "External id": 254525,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560076.480, "dur": 0.209, + "args": { + "External id": 254526,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560077.468, "dur": 0.200, + "args": { + "External id": 254527,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560078.425, "dur": 0.205, + "args": { + "External id": 254528,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560081.274, "dur": 0.201, + "args": { + "External id": 254529,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560082.227, "dur": 0.216, + "args": { + "External id": 254530,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560084.212, "dur": 0.214, + "args": { + "External id": 254531,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560085.223, "dur": 0.227, + "args": { + "External id": 254532,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560086.356, "dur": 0.245, + "args": { + "External id": 254533,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560087.404, "dur": 0.212, + "args": { + "External id": 254534,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560088.443, "dur": 0.197, + "args": { + "External id": 254535,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560089.434, "dur": 0.203, + "args": { + "External id": 254536,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560092.171, "dur": 0.315, + "args": { + "External id": 254537,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560093.324, "dur": 0.204, + "args": { + "External id": 254538,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560094.309, "dur": 0.198, + "args": { + "External id": 254539,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560095.278, "dur": 0.221, + "args": { + "External id": 254540,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560096.261, "dur": 0.199, + "args": { + "External id": 254541,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560097.277, "dur": 0.205, + "args": { + "External id": 254542,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560098.519, "dur": 0.199, + "args": { + "External id": 254543,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560099.470, "dur": 0.202, + "args": { + "External id": 254544,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560102.120, "dur": 0.199, + "args": { + "External id": 254545,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560103.065, "dur": 0.204, + "args": { + "External id": 254546,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560104.091, "dur": 0.229, + "args": { + "External id": 254547,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560105.103, "dur": 0.201, + "args": { + "External id": 254548,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560106.051, "dur": 0.203, + "args": { + "External id": 254549,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560107.076, "dur": 0.200, + "args": { + "External id": 254550,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560108.058, "dur": 0.199, + "args": { + "External id": 254551,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560109.043, "dur": 0.201, + "args": { + "External id": 254552,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560111.868, "dur": 0.202, + "args": { + "External id": 254553,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560112.880, "dur": 0.234, + "args": { + "External id": 254554,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560113.890, "dur": 0.195, + "args": { + "External id": 254555,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560114.866, "dur": 0.201, + "args": { + "External id": 254556,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560115.818, "dur": 0.198, + "args": { + "External id": 254557,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560116.843, "dur": 0.216, + "args": { + "External id": 254558,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560117.941, "dur": 0.205, + "args": { + "External id": 254559,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560118.987, "dur": 0.221, + "args": { + "External id": 254560,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560121.074, "dur": 0.198, + "args": { + "External id": 254561,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560122.109, "dur": 0.208, + "args": { + "External id": 254562,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560123.157, "dur": 0.198, + "args": { + "External id": 254563,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560124.179, "dur": 0.210, + "args": { + "External id": 254564,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560125.149, "dur": 0.198, + "args": { + "External id": 254565,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560126.090, "dur": 0.201, + "args": { + "External id": 254566,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560127.067, "dur": 0.234, + "args": { + "External id": 254567,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560128.044, "dur": 0.202, + "args": { + "External id": 254568,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560130.826, "dur": 0.202, + "args": { + "External id": 254569,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560131.924, "dur": 0.212, + "args": { + "External id": 254570,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560133.318, "dur": 0.211, + "args": { + "External id": 254571,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560134.320, "dur": 0.229, + "args": { + "External id": 254572,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560142.321, "dur": 0.221, + "args": { + "External id": 254573,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560143.465, "dur": 0.203, + "args": { + "External id": 254574,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560144.412, "dur": 0.230, + "args": { + "External id": 254575,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560145.474, "dur": 0.201, + "args": { + "External id": 254576,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560148.003, "dur": 0.205, + "args": { + "External id": 254577,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560148.978, "dur": 0.197, + "args": { + "External id": 254578,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560149.975, "dur": 0.201, + "args": { + "External id": 254579,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560150.945, "dur": 0.200, + "args": { + "External id": 254580,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560151.911, "dur": 0.208, + "args": { + "External id": 254581,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560152.940, "dur": 0.216, + "args": { + "External id": 254582,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560153.904, "dur": 0.202, + "args": { + "External id": 254583,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560154.858, "dur": 0.196, + "args": { + "External id": 254584,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560157.713, "dur": 0.209, + "args": { + "External id": 254585,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560158.673, "dur": 0.196, + "args": { + "External id": 254586,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560159.657, "dur": 0.206, + "args": { + "External id": 254587,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918560160.677, "dur": 0.198, + "args": { + "External id": 254588,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 4183438, "tid": 4183438, + "ts": 667918560220.267, "dur": 1150.526, + "args": { + "External id": 254589,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 4183438, "tid": 4183438, + "ts": 667918560566.167, "dur": 738.190, + "args": { + "External id": 254590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560572.139, "dur": 7.314, + "args": { + "External id": 254591,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560575.999, "dur": 2.707, + "args": { + "External id": 254592,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560580.085, "dur": 4.270, + "args": { + "External id": 254593,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560581.118, "dur": 2.927, + "args": { + "External id": 254594,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560584.818, "dur": 2.843, + "args": { + "External id": 254595,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560587.232, "dur": 0.364, + "args": { + "External id": 254596,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560587.948, "dur": 1.792, + "args": { + "External id": 254597,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560588.367, "dur": 1.306, + "args": { + "External id": 254598,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560591.595, "dur": 5.109, + "args": { + "External id": 254599,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560596.005, "dur": 0.626, + "args": { + "External id": 254600,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560597.030, "dur": 0.964, + "args": { + "External id": 254601,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560597.432, "dur": 0.497, + "args": { + "External id": 254602,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560598.374, "dur": 1.193, + "args": { + "External id": 254603,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560598.746, "dur": 0.752, + "args": { + "External id": 254604,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560599.883, "dur": 4.768, + "args": { + "External id": 254605,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560604.031, "dur": 0.555, + "args": { + "External id": 254606,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560604.881, "dur": 1.722, + "args": { + "External id": 254607,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560605.555, "dur": 0.982, + "args": { + "External id": 254608,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560606.829, "dur": 2.794, + "args": { + "External id": 254609,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560607.199, "dur": 2.353, + "args": { + "External id": 254610,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560609.878, "dur": 2.801, + "args": { + "External id": 254611,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560612.113, "dur": 0.501, + "args": { + "External id": 254612,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560612.907, "dur": 1.163, + "args": { + "External id": 254613,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560613.307, "dur": 0.696, + "args": { + "External id": 254614,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560615.711, "dur": 5.141, + "args": { + "External id": 254615,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560620.049, "dur": 0.739, + "args": { + "External id": 254616,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560621.129, "dur": 1.205, + "args": { + "External id": 254617,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560621.482, "dur": 0.786, + "args": { + "External id": 254618,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560622.591, "dur": 1.347, + "args": { + "External id": 254619,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560622.956, "dur": 0.914, + "args": { + "External id": 254620,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560624.237, "dur": 3.271, + "args": { + "External id": 254621,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560626.618, "dur": 0.827, + "args": { + "External id": 254622,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560627.730, "dur": 1.088, + "args": { + "External id": 254623,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560628.129, "dur": 0.626, + "args": { + "External id": 254624,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560629.042, "dur": 2.813, + "args": { + "External id": 254625,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560629.415, "dur": 2.371, + "args": { + "External id": 254626,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560632.083, "dur": 3.153, + "args": { + "External id": 254627,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560634.534, "dur": 0.640, + "args": { + "External id": 254628,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560635.711, "dur": 1.046, + "args": { + "External id": 254629,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560636.065, "dur": 0.623, + "args": { + "External id": 254630,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560638.446, "dur": 4.935, + "args": { + "External id": 254631,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560642.657, "dur": 0.654, + "args": { + "External id": 254632,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560643.634, "dur": 1.281, + "args": { + "External id": 254633,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560643.994, "dur": 0.855, + "args": { + "External id": 254634,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560645.349, "dur": 1.245, + "args": { + "External id": 254635,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560645.760, "dur": 0.768, + "args": { + "External id": 254636,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560646.947, "dur": 3.340, + "args": { + "External id": 254637,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560649.644, "dur": 0.581, + "args": { + "External id": 254638,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560650.510, "dur": 0.838, + "args": { + "External id": 254639,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560650.866, "dur": 0.417, + "args": { + "External id": 254640,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560651.667, "dur": 28.507, + "args": { + "External id": 254641,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560652.035, "dur": 27.261, + "args": { + "External id": 254642,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560681.475, "dur": 3.492, + "args": { + "External id": 254643,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560684.047, "dur": 0.615, + "args": { + "External id": 254644,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560685.232, "dur": 1.261, + "args": { + "External id": 254645,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560685.598, "dur": 0.818, + "args": { + "External id": 254646,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560687.355, "dur": 4.804, + "args": { + "External id": 254647,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560691.678, "dur": 0.417, + "args": { + "External id": 254648,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560692.420, "dur": 1.369, + "args": { + "External id": 254649,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560692.794, "dur": 0.932, + "args": { + "External id": 254650,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560694.017, "dur": 1.079, + "args": { + "External id": 254651,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560694.379, "dur": 0.647, + "args": { + "External id": 254652,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560695.323, "dur": 3.107, + "args": { + "External id": 254653,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560697.757, "dur": 0.607, + "args": { + "External id": 254654,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560698.657, "dur": 1.340, + "args": { + "External id": 254655,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560699.080, "dur": 0.742, + "args": { + "External id": 254656,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560700.232, "dur": 2.897, + "args": { + "External id": 254657,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560700.644, "dur": 2.313, + "args": { + "External id": 254658,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560703.373, "dur": 2.656, + "args": { + "External id": 254659,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560705.244, "dur": 0.534, + "args": { + "External id": 254660,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560706.262, "dur": 1.171, + "args": { + "External id": 254661,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560706.617, "dur": 0.661, + "args": { + "External id": 254662,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560707.922, "dur": 5.234, + "args": { + "External id": 254663,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560712.335, "dur": 0.755, + "args": { + "External id": 254664,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560713.475, "dur": 1.049, + "args": { + "External id": 254665,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560713.845, "dur": 0.615, + "args": { + "External id": 254666,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560714.982, "dur": 1.421, + "args": { + "External id": 254667,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560715.350, "dur": 0.983, + "args": { + "External id": 254668,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560716.717, "dur": 3.602, + "args": { + "External id": 254669,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560719.381, "dur": 0.874, + "args": { + "External id": 254670,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560720.574, "dur": 0.990, + "args": { + "External id": 254671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560720.924, "dur": 0.491, + "args": { + "External id": 254672,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560721.813, "dur": 2.843, + "args": { + "External id": 254673,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560722.166, "dur": 2.416, + "args": { + "External id": 254674,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560724.913, "dur": 1.300, + "args": { + "External id": 254675,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560725.584, "dur": 0.565, + "args": { + "External id": 254676,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560726.450, "dur": 1.478, + "args": { + "External id": 254677,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560726.799, "dur": 1.056, + "args": { + "External id": 254678,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560728.166, "dur": 3.698, + "args": { + "External id": 254679,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560731.046, "dur": 0.749, + "args": { + "External id": 254680,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560732.156, "dur": 1.154, + "args": { + "External id": 254681,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560732.525, "dur": 0.531, + "args": { + "External id": 254682,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560733.580, "dur": 1.584, + "args": { + "External id": 254683,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560733.921, "dur": 1.176, + "args": { + "External id": 254684,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560735.528, "dur": 3.314, + "args": { + "External id": 254685,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560738.044, "dur": 0.635, + "args": { + "External id": 254686,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560739.181, "dur": 1.253, + "args": { + "External id": 254687,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560739.555, "dur": 0.720, + "args": { + "External id": 254688,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560740.906, "dur": 2.626, + "args": { + "External id": 254689,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560741.251, "dur": 2.212, + "args": { + "External id": 254690,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560743.777, "dur": 1.533, + "args": { + "External id": 254691,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560744.748, "dur": 0.500, + "args": { + "External id": 254692,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560745.752, "dur": 1.072, + "args": { + "External id": 254693,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560746.088, "dur": 0.668, + "args": { + "External id": 254694,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560747.066, "dur": 3.218, + "args": { + "External id": 254695,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560749.557, "dur": 0.658, + "args": { + "External id": 254696,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560750.584, "dur": 1.035, + "args": { + "External id": 254697,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560750.926, "dur": 0.631, + "args": { + "External id": 254698,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560751.859, "dur": 1.347, + "args": { + "External id": 254699,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560752.217, "dur": 0.729, + "args": { + "External id": 254700,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560753.457, "dur": 3.238, + "args": { + "External id": 254701,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560755.914, "dur": 0.618, + "args": { + "External id": 254702,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560756.938, "dur": 1.035, + "args": { + "External id": 254703,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560757.290, "dur": 0.614, + "args": { + "External id": 254704,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560758.210, "dur": 2.614, + "args": { + "External id": 254705,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560758.551, "dur": 1.999, + "args": { + "External id": 254706,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560761.064, "dur": 1.277, + "args": { + "External id": 254707,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560761.557, "dur": 0.717, + "args": { + "External id": 254708,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560762.586, "dur": 1.440, + "args": { + "External id": 254709,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560762.944, "dur": 1.015, + "args": { + "External id": 254710,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560764.379, "dur": 3.395, + "args": { + "External id": 254711,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560767.112, "dur": 0.596, + "args": { + "External id": 254712,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560768.189, "dur": 1.050, + "args": { + "External id": 254713,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560768.559, "dur": 0.617, + "args": { + "External id": 254714,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560769.479, "dur": 1.277, + "args": { + "External id": 254715,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560769.837, "dur": 0.853, + "args": { + "External id": 254716,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560770.999, "dur": 2.945, + "args": { + "External id": 254717,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560773.382, "dur": 0.500, + "args": { + "External id": 254718,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560774.182, "dur": 1.006, + "args": { + "External id": 254719,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560774.541, "dur": 0.585, + "args": { + "External id": 254720,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560775.428, "dur": 2.491, + "args": { + "External id": 254721,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560775.770, "dur": 2.078, + "args": { + "External id": 254722,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560778.239, "dur": 1.727, + "args": { + "External id": 254723,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560778.772, "dur": 1.128, + "args": { + "External id": 254724,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560780.402, "dur": 1.297, + "args": { + "External id": 254725,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560780.739, "dur": 0.893, + "args": { + "External id": 254726,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560781.940, "dur": 3.952, + "args": { + "External id": 254727,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560784.887, "dur": 0.940, + "args": { + "External id": 254728,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560786.164, "dur": 1.108, + "args": { + "External id": 254729,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560786.525, "dur": 0.475, + "args": { + "External id": 254730,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560787.514, "dur": 0.985, + "args": { + "External id": 254731,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560787.904, "dur": 0.528, + "args": { + "External id": 254732,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560788.768, "dur": 3.354, + "args": { + "External id": 254733,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560791.303, "dur": 0.756, + "args": { + "External id": 254734,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560792.360, "dur": 1.197, + "args": { + "External id": 254735,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560792.705, "dur": 0.789, + "args": { + "External id": 254736,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560793.798, "dur": 2.878, + "args": { + "External id": 254737,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560794.153, "dur": 2.257, + "args": { + "External id": 254738,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560796.927, "dur": 1.299, + "args": { + "External id": 254739,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560797.533, "dur": 0.627, + "args": { + "External id": 254740,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560798.578, "dur": 1.314, + "args": { + "External id": 254741,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560798.962, "dur": 0.864, + "args": { + "External id": 254742,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560800.134, "dur": 3.670, + "args": { + "External id": 254743,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560803.050, "dur": 0.686, + "args": { + "External id": 254744,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560804.075, "dur": 1.083, + "args": { + "External id": 254745,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560804.478, "dur": 0.619, + "args": { + "External id": 254746,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560805.398, "dur": 1.054, + "args": { + "External id": 254747,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560805.735, "dur": 0.650, + "args": { + "External id": 254748,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560806.703, "dur": 3.018, + "args": { + "External id": 254749,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560809.050, "dur": 0.607, + "args": { + "External id": 254750,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560810.021, "dur": 1.277, + "args": { + "External id": 254751,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560810.408, "dur": 0.624, + "args": { + "External id": 254752,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560811.539, "dur": 2.647, + "args": { + "External id": 254753,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560811.884, "dur": 2.230, + "args": { + "External id": 254754,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560814.430, "dur": 1.697, + "args": { + "External id": 254755,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560815.114, "dur": 0.948, + "args": { + "External id": 254756,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560816.518, "dur": 1.125, + "args": { + "External id": 254757,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560816.875, "dur": 0.702, + "args": { + "External id": 254758,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560817.970, "dur": 3.473, + "args": { + "External id": 254759,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560820.896, "dur": 0.481, + "args": { + "External id": 254760,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560821.708, "dur": 1.713, + "args": { + "External id": 254761,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560822.049, "dur": 1.034, + "args": { + "External id": 254762,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560823.667, "dur": 1.433, + "args": { + "External id": 254763,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560824.051, "dur": 0.854, + "args": { + "External id": 254764,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560825.344, "dur": 3.172, + "args": { + "External id": 254765,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560827.798, "dur": 0.650, + "args": { + "External id": 254766,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560828.763, "dur": 1.243, + "args": { + "External id": 254767,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560829.118, "dur": 0.631, + "args": { + "External id": 254768,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560830.259, "dur": 2.347, + "args": { + "External id": 254769,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560830.622, "dur": 1.912, + "args": { + "External id": 254770,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560832.882, "dur": 1.330, + "args": { + "External id": 254771,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560833.549, "dur": 0.493, + "args": { + "External id": 254772,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560834.451, "dur": 1.362, + "args": { + "External id": 254773,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560834.840, "dur": 0.905, + "args": { + "External id": 254774,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560836.081, "dur": 3.790, + "args": { + "External id": 254775,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560839.043, "dur": 0.765, + "args": { + "External id": 254776,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560840.138, "dur": 1.337, + "args": { + "External id": 254777,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560840.483, "dur": 0.740, + "args": { + "External id": 254778,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560841.728, "dur": 1.220, + "args": { + "External id": 254779,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560842.110, "dur": 0.771, + "args": { + "External id": 254780,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560843.214, "dur": 3.251, + "args": { + "External id": 254781,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560845.601, "dur": 0.800, + "args": { + "External id": 254782,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560846.708, "dur": 0.884, + "args": { + "External id": 254783,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560847.059, "dur": 0.463, + "args": { + "External id": 254784,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560847.850, "dur": 2.858, + "args": { + "External id": 254785,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560848.214, "dur": 2.237, + "args": { + "External id": 254786,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560850.985, "dur": 1.435, + "args": { + "External id": 254787,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560851.709, "dur": 0.645, + "args": { + "External id": 254788,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560852.665, "dur": 0.975, + "args": { + "External id": 254789,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560853.015, "dur": 0.559, + "args": { + "External id": 254790,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560853.878, "dur": 3.407, + "args": { + "External id": 254791,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560856.657, "dur": 0.556, + "args": { + "External id": 254792,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560857.576, "dur": 1.016, + "args": { + "External id": 254793,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560857.929, "dur": 0.600, + "args": { + "External id": 254794,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560858.883, "dur": 1.582, + "args": { + "External id": 254795,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560859.233, "dur": 0.971, + "args": { + "External id": 254796,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560860.718, "dur": 3.846, + "args": { + "External id": 254797,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560863.535, "dur": 0.963, + "args": { + "External id": 254798,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560864.802, "dur": 1.083, + "args": { + "External id": 254799,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560865.158, "dur": 0.661, + "args": { + "External id": 254800,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560866.160, "dur": 2.706, + "args": { + "External id": 254801,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560866.518, "dur": 2.188, + "args": { + "External id": 254802,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560869.114, "dur": 1.460, + "args": { + "External id": 254803,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560869.836, "dur": 0.670, + "args": { + "External id": 254804,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560870.813, "dur": 1.125, + "args": { + "External id": 254805,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560871.160, "dur": 0.709, + "args": { + "External id": 254806,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560872.180, "dur": 4.117, + "args": { + "External id": 254807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560874.911, "dur": 1.320, + "args": { + "External id": 254808,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560876.603, "dur": 1.046, + "args": { + "External id": 254809,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560876.953, "dur": 0.633, + "args": { + "External id": 254810,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560877.892, "dur": 1.158, + "args": { + "External id": 254811,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560878.251, "dur": 0.730, + "args": { + "External id": 254812,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560879.296, "dur": 3.462, + "args": { + "External id": 254813,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560882.105, "dur": 0.485, + "args": { + "External id": 254814,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560883.103, "dur": 0.922, + "args": { + "External id": 254815,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560883.485, "dur": 0.474, + "args": { + "External id": 254816,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560884.270, "dur": 2.836, + "args": { + "External id": 254817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560884.630, "dur": 2.406, + "args": { + "External id": 254818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560887.360, "dur": 1.516, + "args": { + "External id": 254819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560888.129, "dur": 0.479, + "args": { + "External id": 254820,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560889.171, "dur": 1.124, + "args": { + "External id": 254821,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560889.530, "dur": 0.696, + "args": { + "External id": 254822,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560890.537, "dur": 3.007, + "args": { + "External id": 254823,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560892.924, "dur": 0.555, + "args": { + "External id": 254824,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560893.818, "dur": 1.043, + "args": { + "External id": 254825,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560894.167, "dur": 0.631, + "args": { + "External id": 254826,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560895.131, "dur": 1.441, + "args": { + "External id": 254827,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560895.473, "dur": 1.032, + "args": { + "External id": 254828,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560896.831, "dur": 3.075, + "args": { + "External id": 254829,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560899.244, "dur": 0.600, + "args": { + "External id": 254830,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560900.146, "dur": 1.218, + "args": { + "External id": 254831,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560900.503, "dur": 0.794, + "args": { + "External id": 254832,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560901.824, "dur": 2.688, + "args": { + "External id": 254833,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560902.186, "dur": 2.253, + "args": { + "External id": 254834,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560904.771, "dur": 1.329, + "args": { + "External id": 254835,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560905.328, "dur": 0.708, + "args": { + "External id": 254836,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560906.343, "dur": 1.145, + "args": { + "External id": 254837,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560906.692, "dur": 0.728, + "args": { + "External id": 254838,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560907.736, "dur": 3.877, + "args": { + "External id": 254839,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560910.551, "dur": 0.997, + "args": { + "External id": 254840,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560911.918, "dur": 1.041, + "args": { + "External id": 254841,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560912.280, "dur": 0.611, + "args": { + "External id": 254842,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560913.231, "dur": 0.920, + "args": { + "External id": 254843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560913.590, "dur": 0.494, + "args": { + "External id": 254844,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560914.603, "dur": 3.094, + "args": { + "External id": 254845,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560917.009, "dur": 0.624, + "args": { + "External id": 254846,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560918.096, "dur": 1.000, + "args": { + "External id": 254847,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560918.463, "dur": 0.571, + "args": { + "External id": 254848,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560919.533, "dur": 2.716, + "args": { + "External id": 254849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560919.922, "dur": 2.254, + "args": { + "External id": 254850,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560922.495, "dur": 1.241, + "args": { + "External id": 254851,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560923.098, "dur": 0.572, + "args": { + "External id": 254852,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560924.100, "dur": 1.246, + "args": { + "External id": 254853,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560924.460, "dur": 0.819, + "args": { + "External id": 254854,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560925.584, "dur": 3.291, + "args": { + "External id": 254855,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560927.948, "dur": 0.863, + "args": { + "External id": 254856,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560929.364, "dur": 1.208, + "args": { + "External id": 254857,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560929.937, "dur": 0.572, + "args": { + "External id": 254858,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560931.081, "dur": 1.358, + "args": { + "External id": 254859,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560931.747, "dur": 0.626, + "args": { + "External id": 254860,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560932.863, "dur": 4.976, + "args": { + "External id": 254861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560937.062, "dur": 0.715, + "args": { + "External id": 254862,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560938.385, "dur": 1.548, + "args": { + "External id": 254863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560938.928, "dur": 0.943, + "args": { + "External id": 254864,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560940.195, "dur": 2.512, + "args": { + "External id": 254865,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560940.615, "dur": 1.933, + "args": { + "External id": 254866,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560943.010, "dur": 1.112, + "args": { + "External id": 254867,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560943.444, "dur": 0.614, + "args": { + "External id": 254868,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560944.380, "dur": 1.308, + "args": { + "External id": 254869,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560944.849, "dur": 0.771, + "args": { + "External id": 254870,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560946.158, "dur": 3.369, + "args": { + "External id": 254871,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560948.701, "dur": 0.763, + "args": { + "External id": 254872,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560949.822, "dur": 0.829, + "args": { + "External id": 254873,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560950.252, "dur": 0.333, + "args": { + "External id": 254874,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560950.943, "dur": 1.224, + "args": { + "External id": 254875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560951.402, "dur": 0.698, + "args": { + "External id": 254876,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560952.428, "dur": 3.240, + "args": { + "External id": 254877,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560954.907, "dur": 0.696, + "args": { + "External id": 254878,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560956.248, "dur": 1.585, + "args": { + "External id": 254879,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560956.748, "dur": 1.021, + "args": { + "External id": 254880,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560958.095, "dur": 2.656, + "args": { + "External id": 254881,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560958.497, "dur": 2.183, + "args": { + "External id": 254882,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560961.018, "dur": 1.147, + "args": { + "External id": 254883,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560961.473, "dur": 0.628, + "args": { + "External id": 254884,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560962.718, "dur": 1.312, + "args": { + "External id": 254885,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560963.186, "dur": 0.778, + "args": { + "External id": 254886,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560964.324, "dur": 3.074, + "args": { + "External id": 254887,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560966.774, "dur": 0.561, + "args": { + "External id": 254888,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560967.681, "dur": 1.112, + "args": { + "External id": 254889,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560968.085, "dur": 0.644, + "args": { + "External id": 254890,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560969.246, "dur": 1.144, + "args": { + "External id": 254891,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560969.674, "dur": 0.648, + "args": { + "External id": 254892,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560970.658, "dur": 3.152, + "args": { + "External id": 254893,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560973.089, "dur": 0.655, + "args": { + "External id": 254894,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560974.088, "dur": 0.918, + "args": { + "External id": 254895,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560974.499, "dur": 0.444, + "args": { + "External id": 254896,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560975.295, "dur": 2.251, + "args": { + "External id": 254897,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560975.755, "dur": 1.722, + "args": { + "External id": 254898,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560977.824, "dur": 1.382, + "args": { + "External id": 254899,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560978.228, "dur": 0.912, + "args": { + "External id": 254900,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560979.561, "dur": 1.183, + "args": { + "External id": 254901,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560979.978, "dur": 0.699, + "args": { + "External id": 254902,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560981.004, "dur": 3.269, + "args": { + "External id": 254903,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560983.216, "dur": 0.996, + "args": { + "External id": 254904,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560984.560, "dur": 1.073, + "args": { + "External id": 254905,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560984.996, "dur": 0.572, + "args": { + "External id": 254906,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560985.944, "dur": 1.140, + "args": { + "External id": 254907,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560986.363, "dur": 0.653, + "args": { + "External id": 254908,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560987.486, "dur": 2.976, + "args": { + "External id": 254909,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560989.931, "dur": 0.464, + "args": { + "External id": 254910,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560990.744, "dur": 1.077, + "args": { + "External id": 254911,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560991.150, "dur": 0.608, + "args": { + "External id": 254912,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560992.171, "dur": 2.687, + "args": { + "External id": 254913,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560992.577, "dur": 2.208, + "args": { + "External id": 254914,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560995.125, "dur": 1.217, + "args": { + "External id": 254915,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560995.540, "dur": 0.740, + "args": { + "External id": 254916,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560996.608, "dur": 1.023, + "args": { + "External id": 254917,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918560997.034, "dur": 0.532, + "args": { + "External id": 254918,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918560997.895, "dur": 2.898, + "args": { + "External id": 254919,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561000.257, "dur": 0.475, + "args": { + "External id": 254920,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561001.084, "dur": 0.937, + "args": { + "External id": 254921,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561001.537, "dur": 0.418, + "args": { + "External id": 254922,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561002.290, "dur": 1.127, + "args": { + "External id": 254923,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561002.709, "dur": 0.640, + "args": { + "External id": 254924,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561003.676, "dur": 2.879, + "args": { + "External id": 254925,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561006.149, "dur": 0.337, + "args": { + "External id": 254926,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561007.004, "dur": 1.507, + "args": { + "External id": 254927,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561007.415, "dur": 1.035, + "args": { + "External id": 254928,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561008.861, "dur": 2.502, + "args": { + "External id": 254929,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561009.283, "dur": 2.012, + "args": { + "External id": 254930,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561011.639, "dur": 1.163, + "args": { + "External id": 254931,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561012.036, "dur": 0.703, + "args": { + "External id": 254932,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561013.054, "dur": 1.035, + "args": { + "External id": 254933,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561013.475, "dur": 0.547, + "args": { + "External id": 254934,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561014.352, "dur": 2.911, + "args": { + "External id": 254935,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561016.759, "dur": 0.438, + "args": { + "External id": 254936,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561017.543, "dur": 0.916, + "args": { + "External id": 254937,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561017.956, "dur": 0.441, + "args": { + "External id": 254938,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561018.952, "dur": 1.119, + "args": { + "External id": 254939,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561019.395, "dur": 0.609, + "args": { + "External id": 254940,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561020.348, "dur": 3.028, + "args": { + "External id": 254941,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561022.737, "dur": 0.573, + "args": { + "External id": 254942,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561023.622, "dur": 1.076, + "args": { + "External id": 254943,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561024.025, "dur": 0.608, + "args": { + "External id": 254944,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561025.179, "dur": 2.386, + "args": { + "External id": 254945,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561025.589, "dur": 1.907, + "args": { + "External id": 254946,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561027.822, "dur": 1.083, + "args": { + "External id": 254947,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561028.244, "dur": 0.597, + "args": { + "External id": 254948,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561029.262, "dur": 1.253, + "args": { + "External id": 254949,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561029.689, "dur": 0.756, + "args": { + "External id": 254950,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561030.779, "dur": 2.963, + "args": { + "External id": 254951,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561033.142, "dur": 0.535, + "args": { + "External id": 254952,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561034.182, "dur": 1.500, + "args": { + "External id": 254953,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561034.602, "dur": 1.017, + "args": { + "External id": 254954,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561035.942, "dur": 1.134, + "args": { + "External id": 254955,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561036.348, "dur": 0.659, + "args": { + "External id": 254956,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561037.345, "dur": 2.627, + "args": { + "External id": 254957,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561039.639, "dur": 0.266, + "args": { + "External id": 254958,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561040.315, "dur": 1.006, + "args": { + "External id": 254959,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561040.708, "dur": 0.553, + "args": { + "External id": 254960,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561041.706, "dur": 2.805, + "args": { + "External id": 254961,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561042.111, "dur": 2.232, + "args": { + "External id": 254962,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561044.873, "dur": 1.835, + "args": { + "External id": 254963,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561045.486, "dur": 1.157, + "args": { + "External id": 254964,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561047.119, "dur": 1.446, + "args": { + "External id": 254965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561047.543, "dur": 0.956, + "args": { + "External id": 254966,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561049.022, "dur": 3.015, + "args": { + "External id": 254967,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561051.319, "dur": 0.653, + "args": { + "External id": 254968,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561052.553, "dur": 0.988, + "args": { + "External id": 254969,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561052.970, "dur": 0.509, + "args": { + "External id": 254970,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561053.880, "dur": 1.346, + "args": { + "External id": 254971,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561054.276, "dur": 0.883, + "args": { + "External id": 254972,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561055.492, "dur": 2.902, + "args": { + "External id": 254973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561057.693, "dur": 0.639, + "args": { + "External id": 254974,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561058.791, "dur": 1.437, + "args": { + "External id": 254975,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561059.207, "dur": 0.959, + "args": { + "External id": 254976,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561083.093, "dur": 3.319, + "args": { + "External id": 254977,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561084.001, "dur": 2.337, + "args": { + "External id": 254978,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561086.686, "dur": 1.177, + "args": { + "External id": 254979,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561087.113, "dur": 0.687, + "args": { + "External id": 254980,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561088.259, "dur": 1.149, + "args": { + "External id": 254981,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561088.675, "dur": 0.668, + "args": { + "External id": 254982,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561089.681, "dur": 2.859, + "args": { + "External id": 254983,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561091.931, "dur": 0.545, + "args": { + "External id": 254984,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561092.942, "dur": 1.323, + "args": { + "External id": 254985,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561093.523, "dur": 0.677, + "args": { + "External id": 254986,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561094.514, "dur": 1.205, + "args": { + "External id": 254987,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561094.930, "dur": 0.721, + "args": { + "External id": 254988,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561096.086, "dur": 3.575, + "args": { + "External id": 254989,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561098.505, "dur": 1.093, + "args": { + "External id": 254990,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561099.941, "dur": 2.005, + "args": { + "External id": 254991,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561100.792, "dur": 1.089, + "args": { + "External id": 254992,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561102.220, "dur": 3.405, + "args": { + "External id": 254993,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561102.978, "dur": 2.576, + "args": { + "External id": 254994,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561105.885, "dur": 1.705, + "args": { + "External id": 254995,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561106.871, "dur": 0.656, + "args": { + "External id": 254996,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561107.896, "dur": 2.082, + "args": { + "External id": 254997,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561108.834, "dur": 1.078, + "args": { + "External id": 254998,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561110.222, "dur": 3.839, + "args": { + "External id": 254999,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561113.086, "dur": 0.910, + "args": { + "External id": 255000,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561114.349, "dur": 1.949, + "args": { + "External id": 255001,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561115.574, "dur": 0.662, + "args": { + "External id": 255002,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561116.558, "dur": 7.279, + "args": { + "External id": 255003,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561121.735, "dur": 2.025, + "args": { + "External id": 255004,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561124.202, "dur": 1.371, + "args": { + "External id": 255005,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561124.971, "dur": 0.539, + "args": { + "External id": 255006,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561125.831, "dur": 1.361, + "args": { + "External id": 255007,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561126.513, "dur": 0.614, + "args": { + "External id": 255008,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561127.445, "dur": 3.829, + "args": { + "External id": 255009,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561130.115, "dur": 1.096, + "args": { + "External id": 255010,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561131.593, "dur": 1.609, + "args": { + "External id": 255011,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561132.526, "dur": 0.603, + "args": { + "External id": 255012,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561133.468, "dur": 1.787, + "args": { + "External id": 255013,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561134.128, "dur": 1.060, + "args": { + "External id": 255014,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561135.529, "dur": 2.909, + "args": { + "External id": 255015,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561137.865, "dur": 0.504, + "args": { + "External id": 255016,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561138.685, "dur": 1.725, + "args": { + "External id": 255017,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561139.550, "dur": 0.794, + "args": { + "External id": 255018,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561140.662, "dur": 3.005, + "args": { + "External id": 255019,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561141.278, "dur": 2.321, + "args": { + "External id": 255020,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561143.934, "dur": 1.564, + "args": { + "External id": 255021,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561144.674, "dur": 0.760, + "args": { + "External id": 255022,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561145.785, "dur": 1.853, + "args": { + "External id": 255023,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561146.835, "dur": 0.734, + "args": { + "External id": 255024,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561147.887, "dur": 3.592, + "args": { + "External id": 255025,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561150.999, "dur": 0.417, + "args": { + "External id": 255026,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 4183438, "tid": 4183438, + "ts": 667918561151.766, "dur": 1.555, + "args": { + "External id": 255027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561152.428, "dur": 0.831, + "args": { + "External id": 255028,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 4183438, "tid": 4183438, + "ts": 667918561174.620, "dur": 120.126, + "args": { + "External id": 255029,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 4183438, "tid": 4183438, + "ts": 667918561384.995, "dur": 127.422, + "args": { + "External id": 255030,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[219], [], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 4183438, "tid": 4183438, + "ts": 667918561446.595, "dur": 46.740, + "args": { + "External id": 255031,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[219], [], [], [], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 4183438, "tid": 4183438, + "ts": 667918561460.577, "dur": 1.354, + "args": { + "External id": 255032,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 4183438, "tid": 4183438, + "ts": 667918561834.781, "dur": 922.556, + "args": { + "External id": 255033,"Sequence number": 2987752, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 4183438, "tid": 4183438, + "ts": 667918561889.462, "dur": 49.289, + "args": { + "External id": 255034,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918561895.514, "dur": 1.370, + "args": { + "External id": 255035,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918561899.451, "dur": 0.528, + "args": { + "External id": 255036,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 4183438, "tid": 4183438, + "ts": 667918561961.979, "dur": 445.141, + "args": { + "External id": 255037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 4183438, "tid": 4183438, + "ts": 667918561965.670, "dur": 44.490, + "args": { + "External id": 255038,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 4183438, "tid": 4183438, + "ts": 667918561968.875, "dur": 9.016, + "args": { + "External id": 255039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918561972.960, "dur": 4.171, + "args": { + "External id": 255040,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 4183438, "tid": 4183438, + "ts": 667918561979.629, "dur": 29.895, + "args": { + "External id": 255041,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 4183438, "tid": 4183438, + "ts": 667918562020.743, "dur": 383.097, + "args": { + "External id": 255042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667918562050.234, "dur": 346.297, + "args": { + "External id": 255043,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9266, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 4183438, "tid": 4183438, + "ts": 667918562067.138, "dur": 323.480, + "args": { + "External id": 255044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 4183438, "tid": 4183438, + "ts": 667918562475.451, "dur": 242.205, + "args": { + "External id": 255045,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 4183438, "tid": 4183438, + "ts": 667918562568.902, "dur": 30.749, + "args": { + "External id": 255046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 4183438, "tid": 4183438, + "ts": 667918562586.366, "dur": 4.614, + "args": { + "External id": 255047,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9270, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 4183438, "tid": 4183438, + "ts": 667918562628.056, "dur": 81.695, + "args": { + "External id": 255048,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918562630.888, "dur": 1.049, + "args": { + "External id": 255049,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918562633.163, "dur": 0.566, + "args": { + "External id": 255050,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 4183438, "tid": 4183438, + "ts": 667918562774.222, "dur": 34.903, + "args": { + "External id": 255051,"Sequence number": 2987753, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 4183438, "tid": 4183438, + "ts": 667918562794.746, "dur": 9.854, + "args": { + "External id": 255052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 4183438, "tid": 4183438, + "ts": 667918562797.875, "dur": 6.477, + "args": { + "External id": 255053,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 4183438, "tid": 4183438, + "ts": 667918563181.285, "dur": 52.517, + "args": { + "External id": 255054,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 4183438, "tid": 4183438, + "ts": 667918563244.481, "dur": 29.454, + "args": { + "External id": 255055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 4183438, "tid": 4183438, + "ts": 667918563285.552, "dur": 25.652, + "args": { + "External id": 255056,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 4183438, "tid": 4183438, + "ts": 667918563324.336, "dur": 25.883, + "args": { + "External id": 255057,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918563328.415, "dur": 0.563, + "args": { + "External id": 255058,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 4183438, "tid": 4183438, + "ts": 667918563367.876, "dur": 0.448, + "args": { + "External id": 255059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 4183438, "tid": 4183438, + "ts": 667918563456.240, "dur": 546.324, + "args": { + "External id": 255060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 4183438, "tid": 4183438, + "ts": 667918563892.746, "dur": 82.787, + "args": { + "External id": 255061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 4183438, "tid": 4183438, + "ts": 667918564041.727, "dur": 30.163, + "args": { + "External id": 255062,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 4183438, "tid": 4183438, + "ts": 667918564044.789, "dur": 26.493, + "args": { + "External id": 255063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 4183438, + "ts": 667918564075.890, "dur": 61.582, + "args": { + "External id": 255064,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918564077.510, "dur": 59.778, + "args": { + "External id": 255065,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918564079.376, "dur": 57.533, + "args": { + "External id": 255066,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 4183438, "tid": 4183438, + "ts": 667918564142.237, "dur": 73.246, + "args": { + "External id": 255067,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918564144.729, "dur": 30.774, + "args": { + "External id": 255068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 4183438, "tid": 4183438, + "ts": 667918564150.473, "dur": 2.626, + "args": { + "External id": 255069,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 4183438, "tid": 4183438, + "ts": 667918564154.660, "dur": 20.378, + "args": { + "External id": 255070,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 4183438, "tid": 4183438, + "ts": 667918564158.993, "dur": 2.363, + "args": { + "External id": 255071,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 4183438, "tid": 4183438, + "ts": 667918564177.571, "dur": 36.442, + "args": { + "External id": 255072,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 4183438, "tid": 4183438, + "ts": 667918564219.038, "dur": 35.223, + "args": { + "External id": 255073,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 4183438, "tid": 4183438, + "ts": 667918564219.941, "dur": 34.174, + "args": { + "External id": 255074,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 4183438, "tid": 4183438, + "ts": 667918564220.696, "dur": 33.032, + "args": { + "External id": 255075,"Sequence number": 2987754, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 4183438, "tid": 4183438, + "ts": 667918564283.779, "dur": 4402.983, + "args": { + "External id": 255076,"Record function id": 0, "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 4183438, "tid": 4183438, + "ts": 667918564314.479, "dur": 4314.653, + "args": { + "External id": 255077,"Record function id": 0, "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 4183438, "tid": 4183438, + "ts": 667918565320.191, "dur": 200.675, + "args": { + "External id": 255078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565336.304, "dur": 1.098, + "args": { + "External id": 255079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565338.477, "dur": 0.185, + "args": { + "External id": 255080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565339.162, "dur": 0.053, + "args": { + "External id": 255081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565339.558, "dur": 0.397, + "args": { + "External id": 255082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565340.513, "dur": 0.276, + "args": { + "External id": 255083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565341.246, "dur": 0.215, + "args": { + "External id": 255084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565342.055, "dur": 0.063, + "args": { + "External id": 255085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565342.533, "dur": 0.188, + "args": { + "External id": 255086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565343.082, "dur": 0.092, + "args": { + "External id": 255087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565343.600, "dur": 0.142, + "args": { + "External id": 255088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565344.128, "dur": 0.210, + "args": { + "External id": 255089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565344.755, "dur": 0.072, + "args": { + "External id": 255090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565345.259, "dur": 0.236, + "args": { + "External id": 255091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565345.902, "dur": 0.200, + "args": { + "External id": 255092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565346.449, "dur": 0.064, + "args": { + "External id": 255093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565347.018, "dur": 0.067, + "args": { + "External id": 255094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565347.628, "dur": 0.067, + "args": { + "External id": 255095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565348.068, "dur": 0.065, + "args": { + "External id": 255096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565348.548, "dur": 0.308, + "args": { + "External id": 255097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565349.214, "dur": 0.224, + "args": { + "External id": 255098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565349.797, "dur": 0.065, + "args": { + "External id": 255099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565350.239, "dur": 0.057, + "args": { + "External id": 255100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565350.654, "dur": 0.067, + "args": { + "External id": 255101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565351.081, "dur": 0.065, + "args": { + "External id": 255102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565351.573, "dur": 0.062, + "args": { + "External id": 255103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565352.084, "dur": 0.069, + "args": { + "External id": 255104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565352.568, "dur": 0.056, + "args": { + "External id": 255105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565353.069, "dur": 0.070, + "args": { + "External id": 255106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565353.444, "dur": 0.070, + "args": { + "External id": 255107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565353.914, "dur": 0.055, + "args": { + "External id": 255108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565354.373, "dur": 0.065, + "args": { + "External id": 255109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565354.804, "dur": 0.067, + "args": { + "External id": 255110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565355.498, "dur": 0.068, + "args": { + "External id": 255111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565355.964, "dur": 0.066, + "args": { + "External id": 255112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565356.371, "dur": 0.071, + "args": { + "External id": 255113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565356.780, "dur": 0.073, + "args": { + "External id": 255114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565357.191, "dur": 0.067, + "args": { + "External id": 255115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565357.765, "dur": 0.109, + "args": { + "External id": 255116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565358.196, "dur": 0.065, + "args": { + "External id": 255117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565358.689, "dur": 0.065, + "args": { + "External id": 255118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565359.088, "dur": 0.065, + "args": { + "External id": 255119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565359.520, "dur": 0.063, + "args": { + "External id": 255120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565360.014, "dur": 0.067, + "args": { + "External id": 255121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565360.792, "dur": 0.067, + "args": { + "External id": 255122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565361.274, "dur": 0.052, + "args": { + "External id": 255123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565361.590, "dur": 0.208, + "args": { + "External id": 255124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565362.244, "dur": 0.049, + "args": { + "External id": 255125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565362.729, "dur": 0.064, + "args": { + "External id": 255126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565363.509, "dur": 0.072, + "args": { + "External id": 255127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565364.021, "dur": 0.067, + "args": { + "External id": 255128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565364.554, "dur": 0.067, + "args": { + "External id": 255129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565365.006, "dur": 0.062, + "args": { + "External id": 255130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565365.646, "dur": 0.061, + "args": { + "External id": 255131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565366.114, "dur": 0.064, + "args": { + "External id": 255132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565366.561, "dur": 0.066, + "args": { + "External id": 255133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565366.934, "dur": 0.056, + "args": { + "External id": 255134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565367.383, "dur": 0.056, + "args": { + "External id": 255135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565367.826, "dur": 0.055, + "args": { + "External id": 255136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565368.235, "dur": 0.061, + "args": { + "External id": 255137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565368.641, "dur": 0.056, + "args": { + "External id": 255138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565369.050, "dur": 0.063, + "args": { + "External id": 255139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565369.567, "dur": 0.070, + "args": { + "External id": 255140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565370.154, "dur": 0.053, + "args": { + "External id": 255141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565370.653, "dur": 0.065, + "args": { + "External id": 255142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565371.115, "dur": 0.067, + "args": { + "External id": 255143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565371.560, "dur": 0.066, + "args": { + "External id": 255144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565371.987, "dur": 0.066, + "args": { + "External id": 255145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565372.357, "dur": 0.064, + "args": { + "External id": 255146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565372.780, "dur": 0.066, + "args": { + "External id": 255147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565373.279, "dur": 0.063, + "args": { + "External id": 255148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565373.944, "dur": 0.064, + "args": { + "External id": 255149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565374.277, "dur": 0.065, + "args": { + "External id": 255150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565374.689, "dur": 0.083, + "args": { + "External id": 255151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565375.224, "dur": 0.061, + "args": { + "External id": 255152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565375.721, "dur": 0.065, + "args": { + "External id": 255153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565376.454, "dur": 0.066, + "args": { + "External id": 255154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565376.933, "dur": 0.064, + "args": { + "External id": 255155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565377.337, "dur": 0.064, + "args": { + "External id": 255156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565377.849, "dur": 0.051, + "args": { + "External id": 255157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565378.266, "dur": 0.067, + "args": { + "External id": 255158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565378.713, "dur": 0.068, + "args": { + "External id": 255159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565379.191, "dur": 0.068, + "args": { + "External id": 255160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565379.646, "dur": 0.067, + "args": { + "External id": 255161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565379.988, "dur": 0.065, + "args": { + "External id": 255162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565380.454, "dur": 0.100, + "args": { + "External id": 255163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565380.966, "dur": 0.105, + "args": { + "External id": 255164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565381.404, "dur": 0.054, + "args": { + "External id": 255165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565381.725, "dur": 0.177, + "args": { + "External id": 255166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565382.170, "dur": 0.081, + "args": { + "External id": 255167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565382.791, "dur": 0.189, + "args": { + "External id": 255168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565383.317, "dur": 0.208, + "args": { + "External id": 255169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565384.004, "dur": 0.090, + "args": { + "External id": 255170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565384.533, "dur": 0.224, + "args": { + "External id": 255171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565385.264, "dur": 0.074, + "args": { + "External id": 255172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565385.735, "dur": 0.053, + "args": { + "External id": 255173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565386.136, "dur": 0.053, + "args": { + "External id": 255174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565386.541, "dur": 0.195, + "args": { + "External id": 255175,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565387.016, "dur": 0.070, + "args": { + "External id": 255176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565387.466, "dur": 0.091, + "args": { + "External id": 255177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565387.920, "dur": 0.061, + "args": { + "External id": 255178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565388.338, "dur": 0.063, + "args": { + "External id": 255179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565389.178, "dur": 0.050, + "args": { + "External id": 255180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565389.716, "dur": 0.067, + "args": { + "External id": 255181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565390.124, "dur": 0.057, + "args": { + "External id": 255182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565390.485, "dur": 0.062, + "args": { + "External id": 255183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565390.845, "dur": 0.060, + "args": { + "External id": 255184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565391.229, "dur": 0.059, + "args": { + "External id": 255185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565391.643, "dur": 0.056, + "args": { + "External id": 255186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565392.118, "dur": 0.055, + "args": { + "External id": 255187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565392.575, "dur": 0.049, + "args": { + "External id": 255188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565393.011, "dur": 0.058, + "args": { + "External id": 255189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565393.473, "dur": 0.062, + "args": { + "External id": 255190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565393.910, "dur": 0.056, + "args": { + "External id": 255191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565394.364, "dur": 0.060, + "args": { + "External id": 255192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565394.881, "dur": 0.045, + "args": { + "External id": 255193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565395.365, "dur": 0.056, + "args": { + "External id": 255194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565395.857, "dur": 0.055, + "args": { + "External id": 255195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565396.361, "dur": 0.053, + "args": { + "External id": 255196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565396.781, "dur": 0.063, + "args": { + "External id": 255197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565397.273, "dur": 0.065, + "args": { + "External id": 255198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565397.784, "dur": 0.061, + "args": { + "External id": 255199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565398.288, "dur": 0.055, + "args": { + "External id": 255200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565398.668, "dur": 0.066, + "args": { + "External id": 255201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565399.075, "dur": 0.066, + "args": { + "External id": 255202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565399.470, "dur": 0.067, + "args": { + "External id": 255203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565399.902, "dur": 0.108, + "args": { + "External id": 255204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565400.303, "dur": 0.065, + "args": { + "External id": 255205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565400.690, "dur": 0.064, + "args": { + "External id": 255206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565401.353, "dur": 0.064, + "args": { + "External id": 255207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565401.758, "dur": 0.090, + "args": { + "External id": 255208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565402.118, "dur": 0.067, + "args": { + "External id": 255209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565402.616, "dur": 0.065, + "args": { + "External id": 255210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565403.048, "dur": 0.096, + "args": { + "External id": 255211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565403.536, "dur": 0.062, + "args": { + "External id": 255212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565403.896, "dur": 0.064, + "args": { + "External id": 255213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565404.410, "dur": 0.200, + "args": { + "External id": 255214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565404.982, "dur": 0.066, + "args": { + "External id": 255215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565405.541, "dur": 0.056, + "args": { + "External id": 255216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565405.904, "dur": 0.060, + "args": { + "External id": 255217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565406.318, "dur": 0.056, + "args": { + "External id": 255218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565406.738, "dur": 0.056, + "args": { + "External id": 255219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565407.151, "dur": 0.053, + "args": { + "External id": 255220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565407.500, "dur": 0.054, + "args": { + "External id": 255221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565407.870, "dur": 0.060, + "args": { + "External id": 255222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565408.197, "dur": 0.056, + "args": { + "External id": 255223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565408.510, "dur": 0.056, + "args": { + "External id": 255224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565408.883, "dur": 0.055, + "args": { + "External id": 255225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565409.275, "dur": 0.045, + "args": { + "External id": 255226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565409.582, "dur": 0.051, + "args": { + "External id": 255227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565410.291, "dur": 0.063, + "args": { + "External id": 255228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565410.628, "dur": 0.049, + "args": { + "External id": 255229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565411.087, "dur": 0.061, + "args": { + "External id": 255230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565411.413, "dur": 0.056, + "args": { + "External id": 255231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565412.025, "dur": 0.067, + "args": { + "External id": 255232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565412.332, "dur": 0.050, + "args": { + "External id": 255233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565413.125, "dur": 0.067, + "args": { + "External id": 255234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565413.437, "dur": 0.200, + "args": { + "External id": 255235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565414.339, "dur": 0.301, + "args": { + "External id": 255236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565414.890, "dur": 0.051, + "args": { + "External id": 255237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565415.662, "dur": 0.183, + "args": { + "External id": 255238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565416.088, "dur": 0.224, + "args": { + "External id": 255239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565416.817, "dur": 0.219, + "args": { + "External id": 255240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565417.278, "dur": 0.061, + "args": { + "External id": 255241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565418.057, "dur": 0.088, + "args": { + "External id": 255242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565418.389, "dur": 0.161, + "args": { + "External id": 255243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565419.148, "dur": 0.056, + "args": { + "External id": 255244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565419.451, "dur": 0.047, + "args": { + "External id": 255245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565420.293, "dur": 0.057, + "args": { + "External id": 255246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565420.595, "dur": 0.051, + "args": { + "External id": 255247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565421.546, "dur": 0.061, + "args": { + "External id": 255248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565421.865, "dur": 0.058, + "args": { + "External id": 255249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565422.677, "dur": 0.057, + "args": { + "External id": 255250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565422.981, "dur": 0.054, + "args": { + "External id": 255251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565423.830, "dur": 0.067, + "args": { + "External id": 255252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565424.198, "dur": 0.059, + "args": { + "External id": 255253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565424.930, "dur": 0.059, + "args": { + "External id": 255254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565425.237, "dur": 0.054, + "args": { + "External id": 255255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565425.982, "dur": 0.056, + "args": { + "External id": 255256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565426.282, "dur": 0.081, + "args": { + "External id": 255257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565427.178, "dur": 0.057, + "args": { + "External id": 255258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565427.476, "dur": 0.050, + "args": { + "External id": 255259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565428.192, "dur": 0.057, + "args": { + "External id": 255260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565428.494, "dur": 0.049, + "args": { + "External id": 255261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565429.058, "dur": 0.049, + "args": { + "External id": 255262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565429.349, "dur": 0.055, + "args": { + "External id": 255263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565430.107, "dur": 0.068, + "args": { + "External id": 255264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565430.416, "dur": 0.058, + "args": { + "External id": 255265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565430.978, "dur": 0.058, + "args": { + "External id": 255266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565431.284, "dur": 0.059, + "args": { + "External id": 255267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565431.859, "dur": 0.064, + "args": { + "External id": 255268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565432.172, "dur": 0.051, + "args": { + "External id": 255269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565432.884, "dur": 0.066, + "args": { + "External id": 255270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565433.198, "dur": 0.054, + "args": { + "External id": 255271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565433.829, "dur": 0.160, + "args": { + "External id": 255272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565434.236, "dur": 0.206, + "args": { + "External id": 255273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565435.266, "dur": 0.072, + "args": { + "External id": 255274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565435.583, "dur": 0.082, + "args": { + "External id": 255275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565436.158, "dur": 0.092, + "args": { + "External id": 255276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565436.496, "dur": 0.052, + "args": { + "External id": 255277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565437.254, "dur": 0.069, + "args": { + "External id": 255278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565437.566, "dur": 0.056, + "args": { + "External id": 255279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565438.288, "dur": 0.065, + "args": { + "External id": 255280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565438.596, "dur": 0.057, + "args": { + "External id": 255281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565439.202, "dur": 0.220, + "args": { + "External id": 255282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565439.672, "dur": 0.054, + "args": { + "External id": 255283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565440.469, "dur": 0.066, + "args": { + "External id": 255284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565440.780, "dur": 0.057, + "args": { + "External id": 255285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565441.318, "dur": 0.067, + "args": { + "External id": 255286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565441.643, "dur": 0.054, + "args": { + "External id": 255287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565442.151, "dur": 0.067, + "args": { + "External id": 255288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565442.469, "dur": 0.054, + "args": { + "External id": 255289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565443.003, "dur": 0.066, + "args": { + "External id": 255290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565443.313, "dur": 0.049, + "args": { + "External id": 255291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565443.844, "dur": 0.063, + "args": { + "External id": 255292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565444.150, "dur": 0.053, + "args": { + "External id": 255293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565444.876, "dur": 0.069, + "args": { + "External id": 255294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565445.187, "dur": 0.047, + "args": { + "External id": 255295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565445.957, "dur": 0.048, + "args": { + "External id": 255296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 4183438, "tid": 4183438, + "ts": 667918565446.443, "dur": 0.069, + "args": { + "External id": 255297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 4183438, "tid": 4183438, + "ts": 667918565935.678, "dur": 2614.147, + "args": { + "External id": 255298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "0.00028982697342469331", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 4183438, "tid": 4183438, + "ts": 667918568205.012, "dur": 220.070, + "args": { + "External id": 255299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "0.00028982697342469331", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9522 + } + }, + { + "name": "process_name", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 0, + "args": { + "sort_index": 4183438 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 31367, + "args": { + "name": "thread 31367 (pt_autograd_2)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 31367, + "args": { + "sort_index": 31367 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 31367, + "args": { + "name": "thread 31367 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 31367, + "args": { + "sort_index": 31367 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 4183438, + "args": { + "name": "thread 4183438 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 667917756101.583, "pid": 4183438, "tid": 4183438, + "args": { + "sort_index": 4183438 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 667917756035.163, "dur": 812868.807, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 667917756035.163, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 667917756035.163 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 667918613630.023 + } + ], + "traceName": "exp/mtp.340M.batch16.seqlen4096.context4096.warmup1000.update1.steps100000.lr3e-4.cosine/profile_trace/iteration_13312/rank2_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file