nvdriver.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy
  2. import tinygrad.runtime.autogen.nv_gpu as nv_gpu
  3. from typing import Optional, Any
  4. from tinygrad.helpers import from_mv
  5. from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
  6. from extra.mockgpu.nv.nvgpu import NVGPU
  7. MAP_FIXED = 0x10
  8. libc = ctypes.CDLL(ctypes.util.find_library("c"))
  9. libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
  10. libc.mmap.restype = ctypes.c_void_p
  11. libc.munmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
  12. libc.munmap.restype = ctypes.c_int
  13. NVSubDevice = collections.namedtuple('NVSubDevice', ['device'])
  14. NVUserMode = collections.namedtuple('NVUserMode', ['subdevice'])
  15. NVVASpace = collections.namedtuple('NVVASpace', ['device'])
  16. NVAllocation = collections.namedtuple('NVAllocation', ['device', 'size'])
  17. NVChannelGroup = collections.namedtuple('NVChannelGroup', ['device'])
  18. NVContextShare = collections.namedtuple('NVContextShare', ['channel_group'])
  19. NVGPFIFO = collections.namedtuple('NVGPFIFO', ['device', 'token'])
  20. class NVCtlFileDesc(VirtFileDesc):
  21. def __init__(self, fd, driver):
  22. super().__init__(fd)
  23. self.driver = driver
  24. def ioctl(self, fd, request, argp): return self.driver.ctl_ioctl(request, argp)
  25. def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
  26. class NVUVMFileDesc(VirtFileDesc):
  27. def __init__(self, fd, driver):
  28. super().__init__(fd)
  29. self.driver = driver
  30. def ioctl(self, fd, request, argp): return self.driver.uvm_ioctl(request, argp)
  31. def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
  32. class NVDevFileDesc(VirtFileDesc):
  33. def __init__(self, fd, driver, gpu):
  34. super().__init__(fd)
  35. self.driver, self.gpu = driver, gpu
  36. self._mapping_userland = False
  37. def ioctl(self, fd, request, argp): return self.driver.dev_ioctl(self.gpu, request, argp)
  38. def mmap(self, start, sz, prot, flags, fd, offset):
  39. start = libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
  40. if self._mapping_userland: self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
  41. return start
  42. class NVDriver(VirtDriver):
  43. def __init__(self, gpus=6):
  44. super().__init__()
  45. self.tracked_files += [VirtFile('/dev/nvidiactl', functools.partial(NVCtlFileDesc, driver=self)),
  46. VirtFile('/dev/nvidia-uvm', functools.partial(NVUVMFileDesc, driver=self))]
  47. self.root_handle = None
  48. self.gpus = {}
  49. self.next_fd = (1 << 30)
  50. self.next_handle = 1
  51. self.object_by_handle = {}
  52. self.opened_fds = {}
  53. self.next_doorbell = collections.defaultdict(int)
  54. for i in range(gpus): self._prepare_gpu(i)
  55. def _alloc_fd(self):
  56. my_fd = self.next_fd
  57. self.next_fd = self.next_fd + 1
  58. return my_fd
  59. def _alloc_handle(self):
  60. handle = self.next_handle
  61. self.next_handle += 1
  62. return handle
  63. def _prepare_gpu(self, gpu_id):
  64. self.gpus[gpu_id] = NVGPU(gpu_id)
  65. self.tracked_files += [VirtFile(f'/dev/nvidia{gpu_id}', functools.partial(NVDevFileDesc, driver=self, gpu=self.gpus[gpu_id]))]
  66. def open(self, name, flags, mode, virtfile):
  67. cl = virtfile.fdcls(self._alloc_fd())
  68. self.opened_fds[cl.fd] = cl
  69. return cl
  70. def rm_alloc(self, argp):
  71. struct = nv_gpu.NVOS21_PARAMETERS.from_address(argp)
  72. params_ptr = struct.pAllocParms if struct.pAllocParms else None
  73. if struct.hClass == nv_gpu.NV01_ROOT_CLIENT: self.root_handle = struct.hObjectNew = self._alloc_handle()
  74. elif struct.hClass == nv_gpu.NV01_DEVICE_0:
  75. params:Any = nv_gpu.NV0080_ALLOC_PARAMETERS.from_address(params_ptr)
  76. assert params.hClientShare == self.root_handle
  77. struct.hObjectNew = self._alloc_handle()
  78. self.object_by_handle[struct.hObjectNew] = self.gpus[params.deviceId]
  79. elif struct.hClass == nv_gpu.NV20_SUBDEVICE_0:
  80. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
  81. struct.hObjectNew = self._alloc_handle()
  82. self.object_by_handle[struct.hObjectNew] = NVSubDevice(self.object_by_handle[struct.hObjectParent])
  83. elif struct.hClass == nv_gpu.TURING_USERMODE_A:
  84. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVSubDevice)
  85. struct.hObjectNew = self._alloc_handle()
  86. self.object_by_handle[struct.hObjectNew] = NVUserMode(self.object_by_handle[struct.hObjectParent])
  87. elif struct.hClass == nv_gpu.FERMI_VASPACE_A:
  88. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
  89. struct.hObjectNew = self._alloc_handle()
  90. self.object_by_handle[struct.hObjectNew] = NVVASpace(self.object_by_handle[struct.hObjectParent])
  91. elif struct.hClass == nv_gpu.NV1_MEMORY_SYSTEM or struct.hClass == nv_gpu.NV1_MEMORY_USER:
  92. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
  93. params = nv_gpu.NV_MEMORY_ALLOCATION_PARAMS.from_address(params_ptr)
  94. struct.hObjectNew = self._alloc_handle()
  95. self.object_by_handle[struct.hObjectNew] = NVAllocation(self.object_by_handle[struct.hObjectParent], params.size)
  96. elif struct.hClass == nv_gpu.KEPLER_CHANNEL_GROUP_A:
  97. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
  98. struct.hObjectNew = self._alloc_handle()
  99. self.object_by_handle[struct.hObjectNew] = NVChannelGroup(self.object_by_handle[struct.hObjectParent])
  100. elif struct.hClass == nv_gpu.FERMI_CONTEXT_SHARE_A:
  101. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVChannelGroup)
  102. struct.hObjectNew = self._alloc_handle()
  103. self.object_by_handle[struct.hObjectNew] = NVContextShare(self.object_by_handle[struct.hObjectParent])
  104. elif struct.hClass == nv_gpu.AMPERE_CHANNEL_GPFIFO_A:
  105. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVChannelGroup)
  106. struct.hObjectNew = self._alloc_handle()
  107. params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS.from_address(params_ptr)
  108. gpu = self.object_by_handle[struct.hObjectParent].device
  109. gpfifo_token = gpu.add_gpfifo(params.gpFifoOffset, params.gpFifoEntries)
  110. self.object_by_handle[struct.hObjectNew] = NVGPFIFO(gpu, gpfifo_token)
  111. elif struct.hClass == nv_gpu.AMPERE_DMA_COPY_B or struct.hClass == nv_gpu.ADA_COMPUTE_A:
  112. assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPFIFO)
  113. else: raise RuntimeError(f"Unknown {struct.hClass} to rm_alloc")
  114. return 0
  115. def rm_control(self, argp):
  116. struct = nv_gpu.NVOS54_PARAMETERS.from_address(argp)
  117. params_ptr = struct.params if struct.params else None
  118. if struct.cmd == nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2:
  119. params:Any = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS.from_address(params_ptr)
  120. params.deviceInstance = params.gpuId # emulate them to be the same
  121. elif struct.cmd == nv_gpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST_V2:
  122. params = nv_gpu.NV0080_CTRL_GPU_GET_CLASSLIST_V2_PARAMS.from_address(params_ptr)
  123. classes = [50021, 51607, 51648, 50543, 51125, 51125, 51125, 51125, 50529, 36967, 36909, 37105, 33868, 36978, 37095, 37094, 36980, 37014, 49270,
  124. 41068, 41088, 41280, 50025, 96, 112, 115, 125, 20608, 20640, 20539, 20540, 41089, 41092, 50034, 50810, 50811, 50814, 51056, 51057,
  125. 51059, 51069, 51071, 51632, 51639, 51639, 51706, 52019, 222, 50287, 50273, 50031, 50017] # from ada102
  126. params.numClasses = len(classes)
  127. for i,c in enumerate(classes): params.classList[i] = c
  128. elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_GR_GET_INFO:
  129. info = {nv_gpu.NV2080_CTRL_GR_INFO_INDEX_SM_VERSION: nv_gpu.NV2080_CTRL_GR_INFO_SM_VERSION_3_5}
  130. params = nv_gpu.NV2080_CTRL_GR_GET_INFO_PARAMS.from_address(params_ptr)
  131. reqlist = (nv_gpu.NV2080_CTRL_GR_INFO * params.grInfoListSize).from_address(params.grInfoList)
  132. for i in range(params.grInfoListSize): reqlist[i].data = info[reqlist[i].index]
  133. elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_GPU_GET_GID_INFO:
  134. assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVSubDevice)
  135. gpu = self.object_by_handle[struct.hObject].device
  136. params = nv_gpu.NV2080_CTRL_GPU_GET_GID_INFO_PARAMS.from_address(params_ptr)
  137. if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError(f"Unknown format")
  138. bts = gpu.gpu_uuid(sz=params.length)
  139. for i in range(params.length): params.data[i] = bts[i]
  140. elif struct.cmd == nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN:
  141. assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVGPFIFO)
  142. params = nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS.from_address(params_ptr)
  143. gpu_fifo = self.object_by_handle[struct.hObject]
  144. params.workSubmitToken = gpu_fifo.token
  145. elif struct.cmd == nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE: pass
  146. elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_PERF_BOOST: pass
  147. else: raise RuntimeError(f"Unknown {struct.cmd} to rm_control")
  148. return 0
  149. def ctl_ioctl(self, req, argp):
  150. nr = req & 0xff
  151. if nr == nv_gpu.NV_ESC_RM_ALLOC: return self.rm_alloc(argp)
  152. elif nr == nv_gpu.NV_ESC_RM_ALLOC_MEMORY: pass
  153. elif nr == nv_gpu.NV_ESC_RM_CONTROL: return self.rm_control(argp)
  154. elif nr == nv_gpu.NV_ESC_RM_MAP_MEMORY:
  155. st:Any = nv_gpu.nv_ioctl_nvos33_parameters_with_fd.from_address(argp)
  156. obj = self.object_by_handle[st.params.hMemory]
  157. if isinstance(obj, NVUserMode):
  158. file = self.opened_fds[st.fd]
  159. assert isinstance(file, NVDevFileDesc)
  160. file._mapping_userland = True
  161. elif nr == nv_gpu.NV_ESC_RM_FREE:
  162. st = nv_gpu.NVOS00_PARAMETERS.from_address(argp)
  163. self.object_by_handle.pop(st.hObjectOld)
  164. elif nr == nv_gpu.NV_ESC_CARD_INFO:
  165. for i,gpu in enumerate(self.gpus.values()):
  166. st = nv_gpu.nv_ioctl_card_info_t.from_address(argp + i * ctypes.sizeof(nv_gpu.nv_ioctl_card_info_t))
  167. st.gpu_id = gpu.gpuid
  168. st.pci_info.device_id = 0x2684
  169. st.valid = True
  170. else: raise RuntimeError(f"Unknown {nr} to nvidiactl")
  171. return 0
  172. def uvm_ioctl(self, nr, argp):
  173. if nr == nv_gpu.UVM_INITIALIZE: pass
  174. elif nr == nv_gpu.UVM_MM_INITIALIZE: pass
  175. elif nr == nv_gpu.UVM_REGISTER_GPU:
  176. st:Any = nv_gpu.UVM_REGISTER_GPU_PARAMS.from_address(argp)
  177. assert any(all(st.gpu_uuid.uuid[i] == gpu.gpu_uuid()[i] for i in range(16)) for gpu in self.gpus.values())
  178. elif nr == nv_gpu.UVM_REGISTER_GPU_VASPACE: pass
  179. elif nr == nv_gpu.UVM_ENABLE_PEER_ACCESS: pass # uvm and shared spaced are setup already, no emulation for now
  180. elif nr == nv_gpu.UVM_CREATE_EXTERNAL_RANGE:
  181. st = nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS.from_address(argp)
  182. libc.mmap(st.base, st.length, mmap.PROT_READ|mmap.PROT_WRITE, MAP_FIXED|mmap.MAP_SHARED|mmap.MAP_ANONYMOUS, -1, 0)
  183. elif nr == nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION:
  184. st = nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION_PARAMS.from_address(argp)
  185. for gpu_attr_id in range(st.gpuAttributesCount):
  186. gpu = None
  187. for _gpu in self.gpus.values():
  188. if all(st.perGpuAttributes[gpu_attr_id].gpuUuid.uuid[i] == _gpu.gpu_uuid()[i] for i in range(16)):
  189. gpu = _gpu
  190. break
  191. if gpu is None: return -1
  192. gpu.map_range(st.base, st.length)
  193. elif nr == nv_gpu.UVM_REGISTER_CHANNEL: pass
  194. elif nr == nv_gpu.UVM_FREE:
  195. st = nv_gpu.UVM_FREE_PARAMS.from_address(argp)
  196. libc.munmap(st.base, st.length)
  197. else: raise RuntimeError(f"Unknown {nr} to nvidia-uvm")
  198. return 0
  199. def dev_ioctl(self, dev, req, argp): return 0
  200. def _gpu_mmio_write(self, mv, off, gpu):
  201. any_progress = True
  202. while any_progress:
  203. any_progress = False
  204. for gpu in self.gpus.values():
  205. for q in gpu.queues:
  206. if (prev_rptr:=q.ctrl.GPGet) != q.ctrl.GPPut:
  207. any_progress |= q.execute()