buffer.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. // Package asyncbuffer provides an asynchronous buffer that reads data from an
  2. // io.Reader in the background.
  3. //
  4. // When created, AsyncBuffer starts reading from the upstream reader in the
  5. // background. If a read error occurs, it is stored and can be checked with
  6. // AsyncBuffer.Error().
  7. //
  8. // When reading through AsyncBuffer.Reader().Read(), the error is only returned
  9. // once the reader reaches the point where the error occurred. In other words,
  10. // errors are delayed until encountered by the reader.
  11. //
  12. // However, AsyncBuffer.Close() and AsyncBuffer.Error() will immediately return
  13. // any stored error, even if the reader has not yet reached the error point.
  14. package asyncbuffer
  15. import (
  16. "context"
  17. "errors"
  18. "io"
  19. "log/slog"
  20. "sync"
  21. "sync/atomic"
  22. "github.com/imgproxy/imgproxy/v3/ierrors"
  23. "github.com/imgproxy/imgproxy/v3/ioutil"
  24. )
  25. const (
  26. // chunkSize is the size of each chunk in bytes
  27. chunkSize = 4096
  28. // pauseThreshold is the size of the file which is always read to memory. Data beyond the
  29. // threshold is read only if accessed. If not a multiple of chunkSize, the last chunk it points
  30. // to is read in full.
  31. pauseThreshold = 32768 // 32 KiB
  32. )
  33. // byteChunk is a struct that holds a buffer and the data read from the upstream reader
  34. // data slice is required since the chunk read may be smaller than ChunkSize
  35. type byteChunk struct {
  36. buf []byte
  37. data []byte
  38. }
  39. // chunkPool is a global sync.Pool that holds byteChunk objects for
  40. // all readers
  41. var chunkPool = sync.Pool{
  42. New: func() any {
  43. buf := make([]byte, chunkSize)
  44. return &byteChunk{
  45. buf: buf,
  46. data: buf[:0],
  47. }
  48. },
  49. }
  50. // AsyncBuffer is a wrapper around io.Reader that reads data in chunks
  51. // in background and allows reading from synchronously.
  52. type AsyncBuffer struct {
  53. r io.ReadCloser // Upstream reader
  54. dataLen int // Expected length of the data in r, <= 0 means unknown length
  55. chunks []*byteChunk // References to the chunks read from the upstream reader
  56. mu sync.RWMutex // Mutex on chunks slice
  57. err atomic.Value // Error that occurred during reading
  58. bytesRead atomic.Int64 // Total length of the data read
  59. finished atomic.Bool // Indicates that the buffer has finished reading
  60. closed atomic.Bool // Indicates that the buffer was closed
  61. paused *Latch // Paused buffer does not read data beyond threshold
  62. chunkCond *Cond // Ticker that signals when a new chunk is ready
  63. finishOnce sync.Once
  64. finishFn []context.CancelFunc
  65. }
  66. // New creates a new AsyncBuffer that reads from the given io.ReadCloser in background
  67. // and closes it when finished.
  68. //
  69. // r - io.ReadCloser to read data from
  70. // dataLen - expected length of the data in r, <= 0 means unknown length
  71. // finishFn - optional functions to call when the buffer is finished reading
  72. func New(r io.ReadCloser, dataLen int, finishFn ...context.CancelFunc) *AsyncBuffer {
  73. ab := &AsyncBuffer{
  74. r: r,
  75. dataLen: dataLen,
  76. paused: NewLatch(),
  77. chunkCond: NewCond(),
  78. finishFn: finishFn,
  79. }
  80. go ab.readChunks()
  81. return ab
  82. }
  83. // NewReadFull creates a new AsyncBuffer that reads from the given io.ReadCloser
  84. // in foreground, blocking until all data is read. It returns an error if reading
  85. // fails. When read fails, the reader is closed and resources are released immediately.
  86. func NewReadFull(r io.ReadCloser, dataLen int, finishFn ...context.CancelFunc) (*AsyncBuffer, error) {
  87. ab := &AsyncBuffer{
  88. r: r,
  89. dataLen: dataLen,
  90. paused: NewLatch(),
  91. chunkCond: NewCond(),
  92. finishFn: finishFn,
  93. }
  94. // Release the paused latch so that the reader can read all data immediately
  95. ab.paused.Release()
  96. // Read all data in foreground
  97. ab.readChunks()
  98. // If error occurred during reading, return it
  99. if ab.Error() != nil {
  100. ab.Close() // Reader should be closed and resources released
  101. return nil, ab.Error()
  102. }
  103. return ab, nil
  104. }
  105. // callFinishFn calls the finish functions registered with the AsyncBuffer.
  106. func (ab *AsyncBuffer) callFinishFn() {
  107. ab.finishOnce.Do(func() {
  108. for _, fn := range ab.finishFn {
  109. if fn != nil {
  110. fn()
  111. }
  112. }
  113. })
  114. }
  115. func (ab *AsyncBuffer) setErr(err error) {
  116. if err == nil {
  117. return
  118. }
  119. // If the error is already set, we do not overwrite it
  120. if ab.err.Load() == nil {
  121. ab.err.Store(ierrors.Wrap(err, 1))
  122. }
  123. }
  124. // addChunk adds a new chunk to the AsyncBuffer, increments bytesRead
  125. // and signals that a chunk is ready
  126. func (ab *AsyncBuffer) addChunk(chunk *byteChunk) {
  127. ab.mu.Lock()
  128. defer ab.mu.Unlock()
  129. if ab.closed.Load() {
  130. // If the reader is closed, we return the chunk to the pool
  131. chunkPool.Put(chunk)
  132. return
  133. }
  134. // Store the chunk, increase chunk size, increase length of the data read
  135. ab.chunks = append(ab.chunks, chunk)
  136. ab.bytesRead.Add(int64(len(chunk.data)))
  137. ab.chunkCond.Tick()
  138. }
  139. // readChunks reads data from the upstream reader in background and stores them in the pool
  140. func (ab *AsyncBuffer) readChunks() {
  141. defer func() {
  142. if ab.bytesRead.Load() < int64(ab.dataLen) {
  143. // If the reader has finished reading and we have not read enough data,
  144. // set err to io.ErrUnexpectedEOF
  145. ab.setErr(io.ErrUnexpectedEOF)
  146. }
  147. // Indicate that the reader has finished reading
  148. ab.finished.Store(true)
  149. ab.chunkCond.Close()
  150. // Close the upstream reader
  151. if err := ab.r.Close(); err != nil {
  152. slog.Warn(
  153. "error closing upstream reader",
  154. "error", err,
  155. "source", "asyncbuffer.AsyncBuffer.readChunks",
  156. )
  157. }
  158. ab.callFinishFn()
  159. }()
  160. r := ab.r.(io.Reader)
  161. if ab.dataLen > 0 {
  162. // If the data length is known, we read only that much data
  163. r = io.LimitReader(r, int64(ab.dataLen))
  164. }
  165. // Stop reading if the reader is closed
  166. for !ab.closed.Load() {
  167. // In case we are trying to read data beyond threshold and we are paused,
  168. // wait for pause to be released.
  169. if ab.bytesRead.Load() >= pauseThreshold {
  170. ab.paused.Wait()
  171. // If the reader has been closed while waiting, we can stop reading
  172. if ab.closed.Load() {
  173. return // No more data to read
  174. }
  175. }
  176. // Get a chunk from the pool
  177. // If the pool is empty, it will create a new byteChunk with ChunkSize
  178. chunk, ok := chunkPool.Get().(*byteChunk)
  179. if !ok {
  180. ab.setErr(errors.New("asyncbuffer.AsyncBuffer.readChunks: failed to get chunk from pool"))
  181. return
  182. }
  183. // Read data into the chunk's buffer
  184. // There is no way to guarantee that r.Read will abort on context cancellation,
  185. // unfortunately, this is how golang works.
  186. n, err := ioutil.TryReadFull(r, chunk.buf)
  187. // If it's not the EOF, we need to store the error
  188. if err != nil && err != io.EOF {
  189. ab.setErr(err)
  190. chunkPool.Put(chunk)
  191. return
  192. }
  193. // No bytes were read (n == 0), we can return the chunk to the pool
  194. if n == 0 {
  195. chunkPool.Put(chunk)
  196. return
  197. }
  198. // Resize the chunk's data slice to the number of bytes read
  199. chunk.data = chunk.buf[:n]
  200. // Store the reference to the chunk in the AsyncBuffer
  201. ab.addChunk(chunk)
  202. // EOF at this point means that some bytes were read, but this is the
  203. // end of the stream, so we can stop reading
  204. if err == io.EOF {
  205. return
  206. }
  207. }
  208. }
  209. // closedError returns an error if the attempt to read on a closed reader was made.
  210. // If the reader had an error, it returns that error instead.
  211. func (ab *AsyncBuffer) closedError() error {
  212. // If the reader is closed, we return the error or nil
  213. if !ab.closed.Load() {
  214. return nil
  215. }
  216. err := ab.Error()
  217. if err == nil {
  218. err = errors.New("asyncbuffer.AsyncBuffer.ReadAt: attempt to read on closed reader")
  219. }
  220. return err
  221. }
  222. // offsetAvailable checks if the data at the given offset is available for reading.
  223. // It may return io.EOF if the reader is finished reading and the offset is beyond the end of the stream.
  224. func (ab *AsyncBuffer) offsetAvailable(off int64) (bool, error) {
  225. // We can not read data from the closed reader, none
  226. if err := ab.closedError(); err != nil {
  227. return false, err
  228. }
  229. // In case the offset falls within the already read chunks, we can return immediately,
  230. // even if error has occurred in the future
  231. if off < ab.bytesRead.Load() {
  232. return true, nil
  233. }
  234. // In case the reader is finished reading, and we have not read enough
  235. // data yet, return either error or EOF
  236. if ab.finished.Load() {
  237. // In case, error has occurred, we need to return it
  238. if err := ab.Error(); err != nil {
  239. return false, err
  240. }
  241. // Otherwise, it's EOF if the offset is beyond the end of the stream
  242. return false, io.EOF
  243. }
  244. // No available data
  245. return false, nil
  246. }
  247. // WaitFor waits for the data to be ready at the given offset. nil means ok.
  248. // It guarantees that the chunk at the given offset is ready to be read.
  249. func (ab *AsyncBuffer) WaitFor(off int64) error {
  250. // In case we are trying to read data which would potentially hit the pause threshold,
  251. // we need to unpause the reader ASAP.
  252. if off >= pauseThreshold {
  253. ab.paused.Release()
  254. }
  255. for {
  256. ok, err := ab.offsetAvailable(off)
  257. if ok || err != nil {
  258. return err
  259. }
  260. ab.chunkCond.Wait()
  261. }
  262. }
  263. // Wait waits for the reader to finish reading all data and returns
  264. // the total length of the data read.
  265. func (ab *AsyncBuffer) Wait() (int, error) {
  266. // Wait ends till the end of the stream: unpause the reader
  267. ab.paused.Release()
  268. for {
  269. // We can not read data from the closed reader
  270. if err := ab.closedError(); err != nil {
  271. return 0, err
  272. }
  273. // In case the reader is finished reading, we can return immediately
  274. if ab.finished.Load() {
  275. return int(ab.bytesRead.Load()), ab.Error()
  276. }
  277. // Lock until the next chunk is ready
  278. ab.chunkCond.Wait()
  279. }
  280. }
  281. // ReleaseThreshold releases the pause, allowing the buffer to immediately
  282. // read data beyond the pause threshold.
  283. func (ab *AsyncBuffer) ReleaseThreshold() {
  284. ab.paused.Release()
  285. }
  286. // Error returns the error that occurred during reading data in background.
  287. func (ab *AsyncBuffer) Error() error {
  288. err := ab.err.Load()
  289. if err == nil {
  290. return nil
  291. }
  292. errCast, ok := err.(error)
  293. if !ok {
  294. return errors.New("asyncbuffer.AsyncBuffer.Error: failed to get error")
  295. }
  296. return errCast
  297. }
  298. // readChunkAt copies data from the chunk at the given absolute offset to the provided slice.
  299. // Chunk must be available when this method is called.
  300. // Returns the number of bytes copied to the slice or 0 if chunk has no data
  301. // (eg. offset is beyond the end of the stream).
  302. func (ab *AsyncBuffer) readChunkAt(p []byte, off int64) int {
  303. // If the chunk is not available, we return 0
  304. if off >= ab.bytesRead.Load() {
  305. return 0
  306. }
  307. ind := off / chunkSize // chunk index
  308. chunk := ab.chunks[ind]
  309. startOffset := off % chunkSize // starting offset in the chunk
  310. // If the offset in current chunk is greater than the data
  311. // it has, we return 0
  312. if startOffset >= int64(len(chunk.data)) {
  313. return 0
  314. }
  315. // Copy data to the target slice. The number of bytes to copy is limited by the
  316. // size of the target slice and the size of the data in the chunk.
  317. return copy(p, chunk.data[startOffset:])
  318. }
  319. // readAt reads data from the AsyncBuffer at the given offset.
  320. //
  321. // Please note that if pause threshold is hit in the middle of the reading,
  322. // the data beyond the threshold may not be available.
  323. //
  324. // If the reader is paused and we try to read data beyond the pause threshold,
  325. // it will wait till something could be returned.
  326. func (ab *AsyncBuffer) readAt(p []byte, off int64) (int, error) {
  327. size := int64(len(p)) // total size of the data to read
  328. if off < 0 {
  329. return 0, errors.New("asyncbuffer.AsyncBuffer.readAt: negative offset")
  330. }
  331. // If we plan to hit threshold while reading, release the paused reader
  332. if int64(len(p))+off > pauseThreshold {
  333. ab.paused.Release()
  334. }
  335. // Wait for the offset to be available.
  336. // It may return io.EOF if the offset is beyond the end of the stream.
  337. err := ab.WaitFor(off)
  338. if err != nil {
  339. return 0, err
  340. }
  341. // We lock the mutex until current buffer is read
  342. ab.mu.RLock()
  343. defer ab.mu.RUnlock()
  344. // If the reader is closed, we return an error
  345. if err := ab.closedError(); err != nil {
  346. return 0, err
  347. }
  348. // Read data from the first chunk
  349. n := ab.readChunkAt(p, off)
  350. if n == 0 {
  351. return 0, io.EOF // Failed to read any data: means we tried to read beyond the end of the stream
  352. }
  353. size -= int64(n)
  354. off += int64(n) // Here and beyond off always points to the last read byte + 1
  355. // Now, let's try to read the rest of the data from next chunks while they are available
  356. for size > 0 {
  357. // If data is not available at the given offset, we can return data read so far.
  358. ok, err := ab.offsetAvailable(off)
  359. if !ok {
  360. if err == io.EOF {
  361. return n, nil
  362. }
  363. return n, err
  364. }
  365. // Read data from the next chunk
  366. nX := ab.readChunkAt(p[n:], off)
  367. n += nX
  368. size -= int64(nX)
  369. off += int64(nX)
  370. // If we read data shorter than ChunkSize or, in case that was the last chunk, less than
  371. // the size of the tail, return kind of EOF
  372. if int64(nX) < min(size, int64(chunkSize)) {
  373. return n, nil
  374. }
  375. }
  376. return n, nil
  377. }
  378. // Close closes the AsyncBuffer and releases all resources. It is idempotent.
  379. func (ab *AsyncBuffer) Close() error {
  380. ab.mu.Lock()
  381. defer ab.mu.Unlock()
  382. // If the reader is already closed, we return immediately error or nil
  383. if ab.closed.Load() {
  384. return nil
  385. }
  386. ab.closed.Store(true)
  387. // Return all chunks to the pool
  388. for _, chunk := range ab.chunks {
  389. chunkPool.Put(chunk)
  390. }
  391. // Release the paused latch so that no goroutines are waiting for it
  392. ab.paused.Release()
  393. // Finish downloading
  394. ab.callFinishFn()
  395. return nil
  396. }
  397. // Reader returns an io.ReadSeeker+io.ReaderAt that can be used to read actual data from the AsyncBuffer
  398. func (ab *AsyncBuffer) Reader() *Reader {
  399. return &Reader{ab: ab, pos: 0}
  400. }