f16_to_f32.js 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. const f16tof32 = `
  2. fn u16_to_f16(x: u32) -> f32 {
  3. let sign = f32((x >> 15) & 0x1);
  4. let exponent = f32((x >> 10) & 0x1F);
  5. let fraction = f32(x & 0x3FF);
  6. let sign_multiplier = select(1.0, -1.0, sign == 1.0);
  7. if (exponent == 0.0) {
  8. return sign_multiplier * 6.103515625e-5 * (fraction / 1024.0);
  9. } else {
  10. return sign_multiplier * exp2(exponent - 15.0) * (1.0 + fraction / 1024.0);
  11. }
  12. }
  13. @group(0) @binding(0) var<storage,read_write> data0: array<u32>;
  14. @group(0) @binding(1) var<storage,read_write> data1: array<f32>;
  15. @compute @workgroup_size(256) fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
  16. let gidx = gid.x;
  17. let outgidx = gidx*2;
  18. if (gidx >= arrayLength(&data0)) {
  19. return;
  20. }
  21. let oo = data0[gidx];
  22. let oo1 = (oo >> 16);
  23. let oo2 = oo & 0xFFFFu;
  24. let f1 = u16_to_f16(oo2);
  25. let f2 = u16_to_f16(oo1);
  26. data1[outgidx] = f1;
  27. data1[outgidx + 1] = f2;
  28. }`;
  29. window.f16tof32GPU = async(device, inf16) => {
  30. const input = device.createBuffer({size: inf16.length, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST });
  31. const output = device.createBuffer({size: inf16.length*2, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST });
  32. const gpuWriteBuffer = device.createBuffer({size: input.size, usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE });
  33. const gpuReadBuffer = device.createBuffer({ size: output.size, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ });
  34. const commandEncoder = device.createCommandEncoder();
  35. await gpuWriteBuffer.mapAsync(GPUMapMode.WRITE);
  36. const alignedUint32View = new Uint32Array(inf16.buffer, inf16.byteOffset, inf16.length / 4);
  37. new Uint32Array(gpuWriteBuffer.getMappedRange()).set(alignedUint32View);
  38. gpuWriteBuffer.unmap();
  39. commandEncoder.copyBufferToBuffer(gpuWriteBuffer, 0, input, 0, gpuWriteBuffer.size);
  40. const pipeline = await device.createComputePipelineAsync({layout: "auto", compute: { module: device.createShaderModule({ code: f16tof32 }), entryPoint: "main" }});
  41. addComputePass(device, commandEncoder, pipeline, [input, output], [Math.ceil(inf16.length/(4*256)), 1, 1]);
  42. commandEncoder.copyBufferToBuffer(output, 0, gpuReadBuffer, 0, output.size);
  43. const gpuCommands = commandEncoder.finish();
  44. device.queue.submit([gpuCommands]);
  45. await gpuReadBuffer.mapAsync(GPUMapMode.READ);
  46. const resultBuffer = new Float32Array(gpuReadBuffer.size/4);
  47. resultBuffer.set(new Float32Array(gpuReadBuffer.getMappedRange()));
  48. gpuReadBuffer.unmap();
  49. return resultBuffer;
  50. }