model_eval.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. import time
  2. start = time.perf_counter()
  3. from pathlib import Path
  4. import numpy as np
  5. from tinygrad import Tensor, Device, dtypes, GlobalCounters, TinyJit
  6. from tinygrad.nn.state import get_parameters, load_state_dict, safe_load
  7. from tinygrad.helpers import getenv
  8. def tlog(x): print(f"{x:25s} @ {time.perf_counter()-start:5.2f}s")
  9. def eval_resnet():
  10. Tensor.no_grad = True
  11. # Resnet50-v1.5
  12. from extra.models.resnet import ResNet50
  13. tlog("imports")
  14. GPUS = [f'{Device.DEFAULT}:{i}' for i in range(getenv("GPUS", 6))]
  15. for x in GPUS: Device[x]
  16. tlog("got devices") # NOTE: this is faster with rocm-smi running
  17. class ResnetRunner:
  18. def __init__(self, device=None):
  19. self.mdl = ResNet50()
  20. for x in get_parameters(self.mdl) if device else []: x.to_(device)
  21. if (fn:=getenv("RESNET_MODEL", "")): load_state_dict(self.mdl, safe_load(fn))
  22. else: self.mdl.load_from_pretrained()
  23. self.input_mean = Tensor([0.485, 0.456, 0.406], device=device).reshape(1, -1, 1, 1)
  24. self.input_std = Tensor([0.229, 0.224, 0.225], device=device).reshape(1, -1, 1, 1)
  25. def __call__(self, x:Tensor) -> Tensor:
  26. x = x.permute([0,3,1,2]).cast(dtypes.float32) / 255.0
  27. x -= self.input_mean
  28. x /= self.input_std
  29. return self.mdl(x).log_softmax().argmax(axis=1).realize()
  30. mdl = TinyJit(ResnetRunner(GPUS))
  31. tlog("loaded models")
  32. # evaluation on the mlperf classes of the validation set from imagenet
  33. from examples.mlperf.dataloader import batch_load_resnet
  34. iterator = batch_load_resnet(getenv("BS", 128*6), val=getenv("VAL", 1), shuffle=False, pad_first_batch=True)
  35. def data_get():
  36. x,y,cookie = next(iterator)
  37. return x.shard(GPUS, axis=0).realize(), y, cookie
  38. n,d = 0,0
  39. proc = data_get()
  40. tlog("loaded initial data")
  41. st = time.perf_counter()
  42. while proc is not None:
  43. GlobalCounters.reset()
  44. proc = (mdl(proc[0]), proc[1], proc[2]) # this frees the images
  45. run = time.perf_counter()
  46. # load the next data here
  47. try: next_proc = data_get()
  48. except StopIteration: next_proc = None
  49. nd = time.perf_counter()
  50. y = np.array(proc[1])
  51. proc = (proc[0].numpy() == y) & (y != -1) # this realizes the models and frees the cookies
  52. n += proc.sum()
  53. d += (y != -1).sum()
  54. et = time.perf_counter()
  55. tlog(f"****** {n:5d}/{d:5d} {n*100.0/d:.2f}% -- {(run-st)*1000:7.2f} ms to enqueue, {(et-run)*1000:7.2f} ms to realize ({(nd-run)*1000:7.2f} ms fetching). {(len(proc))/(et-st):8.2f} examples/sec. {GlobalCounters.global_ops*1e-12/(et-st):5.2f} TFLOPS")
  56. st = et
  57. proc, next_proc = next_proc, None
  58. tlog("done")
  59. def eval_unet3d():
  60. # UNet3D
  61. from extra.models.unet3d import UNet3D
  62. from extra.datasets.kits19 import iterate, sliding_window_inference, get_val_files
  63. from examples.mlperf.metrics import dice_score
  64. mdl = UNet3D()
  65. mdl.load_from_pretrained()
  66. s = 0
  67. st = time.perf_counter()
  68. for i, (image, label) in enumerate(iterate(get_val_files()), start=1):
  69. mt = time.perf_counter()
  70. pred, label = sliding_window_inference(mdl, image, label)
  71. et = time.perf_counter()
  72. print(f"{(mt-st)*1000:.2f} ms loading data, {(et-mt)*1000:.2f} ms to run model")
  73. s += dice_score(Tensor(pred), Tensor(label)).mean().item()
  74. print(f"****** {s:.2f}/{i} {s/i:.5f} Mean DICE score")
  75. st = time.perf_counter()
  76. def eval_retinanet():
  77. # RetinaNet with ResNeXt50_32X4D
  78. from extra.models.resnet import ResNeXt50_32X4D
  79. from extra.models.retinanet import RetinaNet
  80. mdl = RetinaNet(ResNeXt50_32X4D())
  81. mdl.load_from_pretrained()
  82. input_mean = Tensor([0.485, 0.456, 0.406]).reshape(1, -1, 1, 1)
  83. input_std = Tensor([0.229, 0.224, 0.225]).reshape(1, -1, 1, 1)
  84. def input_fixup(x):
  85. x = x.permute([0,3,1,2]) / 255.0
  86. x -= input_mean
  87. x /= input_std
  88. return x
  89. from extra.datasets.openimages import openimages, iterate
  90. from pycocotools.coco import COCO
  91. from pycocotools.cocoeval import COCOeval
  92. from contextlib import redirect_stdout
  93. coco = COCO(openimages('validation'))
  94. coco_eval = COCOeval(coco, iouType="bbox")
  95. coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng)
  96. from tinygrad.engine.jit import TinyJit
  97. mdlrun = TinyJit(lambda x: mdl(input_fixup(x)).realize())
  98. n, bs = 0, 8
  99. st = time.perf_counter()
  100. for x, targets in iterate(coco, bs):
  101. dat = Tensor(x.astype(np.float32))
  102. mt = time.perf_counter()
  103. if dat.shape[0] == bs:
  104. outs = mdlrun(dat).numpy()
  105. else:
  106. mdlrun.jit_cache = None
  107. outs = mdl(input_fixup(dat)).numpy()
  108. et = time.perf_counter()
  109. predictions = mdl.postprocess_detections(outs, input_size=dat.shape[1:3], orig_image_sizes=[t["image_size"] for t in targets])
  110. ext = time.perf_counter()
  111. n += len(targets)
  112. print(f"[{n}/{len(coco.imgs)}] == {(mt-st)*1000:.2f} ms loading data, {(et-mt)*1000:.2f} ms to run model, {(ext-et)*1000:.2f} ms for postprocessing")
  113. img_ids = [t["image_id"] for t in targets]
  114. coco_results = [{"image_id": targets[i]["image_id"], "category_id": label, "bbox": box.tolist(), "score": score}
  115. for i, prediction in enumerate(predictions) for box, score, label in zip(*prediction.values())]
  116. with redirect_stdout(None):
  117. coco_eval.cocoDt = coco.loadRes(coco_results)
  118. coco_eval.params.imgIds = img_ids
  119. coco_eval.evaluate()
  120. evaluated_imgs.extend(img_ids)
  121. coco_evalimgs.append(np.array(coco_eval.evalImgs).reshape(ncats, narea, len(img_ids)))
  122. st = time.perf_counter()
  123. coco_eval.params.imgIds = evaluated_imgs
  124. coco_eval._paramsEval.imgIds = evaluated_imgs
  125. coco_eval.evalImgs = list(np.concatenate(coco_evalimgs, -1).flatten())
  126. coco_eval.accumulate()
  127. coco_eval.summarize()
  128. def eval_rnnt():
  129. # RNN-T
  130. from extra.models.rnnt import RNNT
  131. mdl = RNNT()
  132. mdl.load_from_pretrained()
  133. from extra.datasets.librispeech import iterate
  134. from examples.mlperf.metrics import word_error_rate
  135. LABELS = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
  136. c = 0
  137. scores = 0
  138. words = 0
  139. st = time.perf_counter()
  140. for X, Y in iterate():
  141. mt = time.perf_counter()
  142. tt = mdl.decode(Tensor(X[0]), Tensor([X[1]]))
  143. et = time.perf_counter()
  144. print(f"{(mt-st)*1000:.2f} ms loading data, {(et-mt)*1000:.2f} ms to run model")
  145. for n, t in enumerate(tt):
  146. tnp = np.array(t)
  147. _, scores_, words_ = word_error_rate(["".join([LABELS[int(tnp[i])] for i in range(tnp.shape[0])])], [Y[n]])
  148. scores += scores_
  149. words += words_
  150. c += len(tt)
  151. print(f"WER: {scores/words}, {words} words, raw scores: {scores}, c: {c}")
  152. st = time.perf_counter()
  153. def eval_bert():
  154. # Bert-QA
  155. from extra.models.bert import BertForQuestionAnswering
  156. mdl = BertForQuestionAnswering()
  157. mdl.load_from_pretrained()
  158. @TinyJit
  159. def run(input_ids, input_mask, segment_ids):
  160. return mdl(input_ids, input_mask, segment_ids).realize()
  161. from extra.datasets.squad import iterate
  162. from examples.mlperf.helpers import get_bert_qa_prediction
  163. from examples.mlperf.metrics import f1_score
  164. from transformers import BertTokenizer
  165. tokenizer = BertTokenizer(str(Path(__file__).parents[2] / "weights/bert_vocab.txt"))
  166. c = 0
  167. f1 = 0.0
  168. st = time.perf_counter()
  169. for X, Y in iterate(tokenizer):
  170. mt = time.perf_counter()
  171. outs = []
  172. for x in X:
  173. outs.append(run(Tensor(x["input_ids"]), Tensor(x["input_mask"]), Tensor(x["segment_ids"])).numpy())
  174. et = time.perf_counter()
  175. print(f"{(mt-st)*1000:.2f} ms loading data, {(et-mt)*1000:.2f} ms to run model over {len(X)} features")
  176. pred = get_bert_qa_prediction(X, Y, outs)
  177. print(f"pred: {pred}\nans: {Y['answers']}")
  178. f1 += max([f1_score(pred, ans) for ans in Y["answers"]])
  179. c += 1
  180. print(f"f1: {f1/c}, raw: {f1}, c: {c}\n")
  181. st = time.perf_counter()
  182. def eval_mrcnn():
  183. from tqdm import tqdm
  184. from extra.models.mask_rcnn import MaskRCNN
  185. from extra.models.resnet import ResNet
  186. from extra.datasets.coco import BASEDIR, images, convert_prediction_to_coco_bbox, convert_prediction_to_coco_mask, accumulate_predictions_for_coco, evaluate_predictions_on_coco, iterate
  187. from examples.mask_rcnn import compute_prediction_batched, Image
  188. mdl = MaskRCNN(ResNet(50, num_classes=None, stride_in_1x1=True))
  189. mdl.load_from_pretrained()
  190. bbox_output = '/tmp/results_bbox.json'
  191. mask_output = '/tmp/results_mask.json'
  192. accumulate_predictions_for_coco([], bbox_output, rm=True)
  193. accumulate_predictions_for_coco([], mask_output, rm=True)
  194. #TODO: bs > 1 not as accurate
  195. bs = 1
  196. for batch in tqdm(iterate(images, bs=bs), total=len(images)//bs):
  197. batch_imgs = []
  198. for image_row in batch:
  199. image_name = image_row['file_name']
  200. img = Image.open(BASEDIR/f'val2017/{image_name}').convert("RGB")
  201. batch_imgs.append(img)
  202. batch_result = compute_prediction_batched(batch_imgs, mdl)
  203. for image_row, result in zip(batch, batch_result):
  204. image_name = image_row['file_name']
  205. box_pred = convert_prediction_to_coco_bbox(image_name, result)
  206. mask_pred = convert_prediction_to_coco_mask(image_name, result)
  207. accumulate_predictions_for_coco(box_pred, bbox_output)
  208. accumulate_predictions_for_coco(mask_pred, mask_output)
  209. del batch_imgs
  210. del batch_result
  211. evaluate_predictions_on_coco(bbox_output, iou_type='bbox')
  212. evaluate_predictions_on_coco(mask_output, iou_type='segm')
  213. if __name__ == "__main__":
  214. # inference only
  215. Tensor.training = False
  216. Tensor.no_grad = True
  217. models = getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert,mrcnn").split(",")
  218. for m in models:
  219. nm = f"eval_{m}"
  220. if nm in globals():
  221. print(f"eval {m}")
  222. globals()[nm]()