logs / cuda_matmul_trace_vllm.json
fxmarty's picture
Upload cuda_matmul_trace_vllm.json with huggingface_hub
0fbd324 verified
{
"schemaVersion": 1,
"deviceProperties": [
{
"id": 0, "name": "AMD Instinct MI300X VF", "totalGlobalMem": 205571227648,
"computeMajor": 9, "computeMinor": 4,
"maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048,
"regsPerBlock": 65536, "warpSize": 64,
"sharedMemPerBlock": 65536, "numSms": 304
, "maxSharedMemoryPerMultiProcessor": 65536
}
],
"roctracer_version": 4.1,
"hip_runtime_version": 60342131,
"hip_driver_version": 60342131,
"trace_id": "3B55A95D004243E49AFCA3BA14757EE7",
"traceEvents": [
{
"ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 455, "tid": 455,
"ts": 7429064525509.327, "dur": 297396.921,
"args": {
"External id": 1,"Record function id": 0, "Fwd thread id": 0, "Sequence number": 0, "finished": true, "Ev Idx": 0
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 455, "tid": 455,
"ts": 7429064525539.719, "dur": 297357.448,
"args": {
"External id": 2,"Record function id": 0, "Fwd thread id": 0, "Sequence number": 0, "finished": true, "Ev Idx": 1
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 455, "tid": 455,
"ts": 7429064525887.174, "dur": 39.320,
"args": {
"External id": 3,"Record function id": 0, "finished": true, "Ev Idx": 2
}
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipStreamIsCapturing", "pid": 455, "tid": 455,
"ts": 7429064525586.815, "dur": 2.314,
"args": {
"External id": 2, "cid": 290, "correlation": 1
}
},
{
"ph": "f", "id": 1, "pid": 455, "tid": 455, "ts": 7429064525586.815,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
"ts": 7429064525591.437, "dur": 75.397,
"args": {
"External id": 2, "bytes": 65011712, "cid": 108, "correlation": 2, "ptr": "0x55fa15629e50"
}
},
{
"ph": "s", "id": 2, "pid": 455, "tid": 455, "ts": 7429064525591.437,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
"ts": 7429064525740.086, "dur": 35.336,
"args": {
"External id": 2, "bytes": 2621440, "cid": 108, "correlation": 3, "ptr": "0x7ffe7048af50"
}
},
{
"ph": "s", "id": 3, "pid": 455, "tid": 455, "ts": 7429064525740.086,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipMemset", "pid": 455, "tid": 455,
"ts": 7429064525782.260, "dur": 14.254,
"args": {
"External id": 2, "cid": 154, "correlation": 4
}
},
{
"ph": "f", "id": 4, "pid": 455, "tid": 455, "ts": 7429064525782.260,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064525799.123, "dur": 0.763,
"args": {
"External id": 2, "cid": 370, "correlation": 5
}
},
{
"ph": "f", "id": 5, "pid": 455, "tid": 455, "ts": 7429064525799.123,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipStreamIsCapturing", "pid": 455, "tid": 455,
"ts": 7429064525892.788, "dur": 0.470,
"args": {
"External id": 3, "cid": 290, "correlation": 6
}
},
{
"ph": "f", "id": 6, "pid": 455, "tid": 455, "ts": 7429064525892.788,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
"ts": 7429064525893.749, "dur": 29.083,
"args": {
"External id": 3, "bytes": 79691776, "cid": 108, "correlation": 7, "ptr": "0x7ffe7048a140"
}
},
{
"ph": "s", "id": 7, "pid": 455, "tid": 455, "ts": 7429064525893.749,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064525932.854, "dur": 0.304,
"args": {
"External id": 2, "cid": 370, "correlation": 8
}
},
{
"ph": "f", "id": 8, "pid": 455, "tid": 455, "ts": 7429064525932.854,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064525994.881, "dur": 0.238,
"args": {
"External id": 2, "cid": 370, "correlation": 9
}
},
{
"ph": "f", "id": 9, "pid": 455, "tid": 455, "ts": 7429064525994.881,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064525996.286, "dur": 0.169,
"args": {
"External id": 2, "cid": 370, "correlation": 10
}
},
{
"ph": "f", "id": 10, "pid": 455, "tid": 455, "ts": 7429064525996.286,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipModuleLoad", "pid": 455, "tid": 455,
"ts": 7429064528594.963, "dur": 103603.691,
"args": {
"External id": 2, "cid": 170, "correlation": 11
}
},
{
"ph": "f", "id": 11, "pid": 455, "tid": 455, "ts": 7429064528594.963,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064632214.111, "dur": 1.114,
"args": {
"External id": 2, "cid": 370, "correlation": 12
}
},
{
"ph": "f", "id": 12, "pid": 455, "tid": 455, "ts": 7429064632214.111,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064745562.959, "dur": 2.839,
"args": {
"External id": 2, "cid": 370, "correlation": 13
}
},
{
"ph": "f", "id": 13, "pid": 455, "tid": 455, "ts": 7429064745562.959,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
"ts": 7429064745586.023, "dur": 0.696,
"args": {
"External id": 2, "cid": 370, "correlation": 14
}
},
{
"ph": "f", "id": 14, "pid": 455, "tid": 455, "ts": 7429064745586.023,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipModuleLoad", "pid": 455, "tid": 455,
"ts": 7429064745602.158, "dur": 77217.980,
"args": {
"External id": 2, "cid": 170, "correlation": 15
}
},
{
"ph": "f", "id": 15, "pid": 455, "tid": 455, "ts": 7429064745602.158,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipExtModuleLaunchKernel", "pid": 455, "tid": 455,
"ts": 7429064822844.054, "dur": 20.100,
"args": {
"External id": 2, "kernel": "Cijk_Ailk_Bljk_SB_Bias_AS_SAV_UserArgs_MT128x64x32_MI32x32x1_SN_LDSB0_AFC1_AFEM1_AFEM1_ASEM1_CLR1_CADS0_EPS0_GRVWA4_GRVWB4_GSUAMB_ISA942_IU1_K1_LBSPPA0_LBSPPB128_LBSPPM0_LPA0_LPB4_LPM0_LRVW4_LWPMn1_MIAV0_MIWT2_1_MO1_NTn1_NTA0_NTB0_NTC0_NTD0_NTM0_NEPBS0_NLCA1_NLCB1_ONLL1_PGR2_PLR1_PKA1_SIA3_SS1_SPO0_SRVW0_SSO0_SVW1_TLDS1_ULSGRO0_USL1_UIOFGRO0_USFGROn1_VSn1_VWA1_VWB1_WSGRA0_WSGRB1_WS64_WG64_4_1_WGMXCC1_WGMXCCG0", "cid": 65, "correlation": 16, "grid": [516096, 1, 1], "block": [256, 1, 1], "shared memory": 0
}
},
{
"ph": "s", "id": 16, "pid": 455, "tid": 455, "ts": 7429064822844.054,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipDeviceSynchronize", "pid": 455, "tid": 455,
"ts": 7429064823009.505, "dur": 1348.012,
"args": {
"cid": 48, "correlation": 17
}
},
{
"ph": "f", "id": 17, "pid": 455, "tid": 455, "ts": 7429064823009.505,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "hipDeviceSynchronize", "pid": 455, "tid": 455,
"ts": 7429064824390.626, "dur": 0.870,
"args": {
"cid": 48, "correlation": 18
}
},
{
"ph": "f", "id": 18, "pid": 455, "tid": 455, "ts": 7429064824390.626,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "gpu_memset", "name": "Memset (Device)", "pid": 2, "tid": 0,
"ts": 7429064525818.773, "dur": 5.652,
"args": {
"External id": 2, "device": 2, "stream": 0, "correlation": 4, "kind": "Device"
}
},
{
"ph": "f", "id": 4, "pid": 2, "tid": 0, "ts": 7429064525818.773,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "kernel", "name": "Cijk_Ailk_Bljk_SB_Bias_AS_SAV_UserArgs_MT128x64x32_MI32x32x1_SN_LDSB0_AFC1_AFEM1_AFEM1_ASEM1_CLR1_CADS0_EPS0_GRVWA4_GRVWB4_GSUAMB_ISA942_IU1_K1_LBSPPA0_LBSPPB128_LBSPPM0_LPA0_LPB4_LPM0_LRVW4_LWPMn1_MIAV0_MIWT2_1_MO1_NTn1_NTA0_NTB0_NTC0_NTD0_NTM0_NEPBS0_NLCA1_NLCB1_ONLL1_PGR2_PLR1_PKA1_SIA3_SS1_SPO0_SRVW0_SSO0_SVW1_TLDS1_ULSGRO0_USL1_UIOFGRO0_USFGROn1_VSn1_VWA1_VWB1_WSGRA0_WSGRB1_WS64_WG64_4_1_WGMXCC1_WGMXCCG0", "pid": 2, "tid": 0,
"ts": 7429064822886.347, "dur": 1475.199,
"args": {
"External id": 2, "device": 2, "stream": 0, "correlation": 16, "kind": "Dispatch Kernel", "grid": [516096, 1, 1], "block": [256, 1, 1]
}
},
{
"ph": "f", "id": 16, "pid": 2, "tid": 0, "ts": 7429064822886.347,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
"args": {
"labels": "CPU"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
"args": {
"sort_index": 455
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
"args": {
"labels": "GPU 0"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
"args": {
"sort_index": 5000000
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
"args": {
"labels": "GPU 1"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
"args": {
"sort_index": 5000001
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
"args": {
"labels": "GPU 2"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
"args": {
"sort_index": 5000002
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
"args": {
"labels": "GPU 3"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
"args": {
"sort_index": 5000003
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
"args": {
"labels": "GPU 4"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
"args": {
"sort_index": 5000004
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
"args": {
"labels": "GPU 5"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
"args": {
"sort_index": 5000005
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
"args": {
"labels": "GPU 6"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
"args": {
"sort_index": 5000006
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
"args": {
"labels": "GPU 7"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
"args": {
"sort_index": 5000007
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
"args": {
"labels": "GPU 8"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
"args": {
"sort_index": 5000008
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
"args": {
"labels": "GPU 9"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
"args": {
"sort_index": 5000009
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
"args": {
"labels": "GPU 10"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
"args": {
"sort_index": 5000010
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
"args": {
"labels": "GPU 11"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
"args": {
"sort_index": 5000011
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
"args": {
"labels": "GPU 12"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
"args": {
"sort_index": 5000012
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
"args": {
"labels": "GPU 13"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
"args": {
"sort_index": 5000013
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
"args": {
"labels": "GPU 14"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
"args": {
"sort_index": 5000014
}
},
{
"name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
"args": {
"labels": "GPU 15"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
"args": {
"sort_index": 5000015
}
},
{
"name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
"args": {
"name": "stream 0 "
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
"args": {
"sort_index": 0
}
},
{
"name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
"args": {
"name": "thread 455 (python)"
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
"args": {
"sort_index": 455
}
},
{
"name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
"args": {
"name": "thread 455 (python)"
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
"args": {
"sort_index": 455
}
},
{
"ph": "X", "cat": "Trace", "ts": 7429064519947.431, "dur": 304458.658,
"pid": "Spans", "tid": "PyTorch Profiler",
"name": "PyTorch Profiler (0)",
"args": {
"Op count": 0
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 7429064519947.431,
"pid": "Spans", "tid": 0,
"args": {
"sort_index": 536870912
}
},
{
"name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g",
"pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 7429064519947.431
},
{
"name": "Record Window End", "ph": "i", "s": "g",
"pid": "", "tid": "", "ts": 7429064824552.924
}
],
"traceName": "cuda_matmul_trace.json",
"displayTimeUnit": "ms",
"baseTimeNanoseconds": 1743521598000000000
}