buffer.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. // Package asyncbuffer provides an asynchronous buffer that reads data from an
  2. // io.Reader in the background.
  3. //
  4. // When created, AsyncBuffer starts reading from the upstream reader in the
  5. // background. If a read error occurs, it is stored and can be checked with
  6. // AsyncBuffer.Error().
  7. //
  8. // When reading through AsyncBuffer.Reader().Read(), the error is only returned
  9. // once the reader reaches the point where the error occurred. In other words,
  10. // errors are delayed until encountered by the reader.
  11. //
  12. // However, AsyncBuffer.Close() and AsyncBuffer.Error() will immediately return
  13. // any stored error, even if the reader has not yet reached the error point.
  14. package asyncbuffer
  15. import (
  16. "errors"
  17. "fmt"
  18. "io"
  19. "sync"
  20. "sync/atomic"
  21. )
  22. const (
  23. // chunkSize is the size of each chunk in bytes
  24. chunkSize = 4096
  25. // pauseThreshold is the size of the file which is always read to memory. Data beyond the
  26. // threshold is read only if accessed. If not a multiple of chunkSize, the last chunk it points
  27. // to is read in full.
  28. pauseThreshold = 32768 // 32 KiB
  29. )
  30. // byteChunk is a struct that holds a buffer and the data read from the upstream reader
  31. // data slice is required since the chunk read may be smaller than ChunkSize
  32. type byteChunk struct {
  33. buf []byte
  34. data []byte
  35. }
  36. // chunkPool is a global sync.Pool that holds byteChunk objects for
  37. // all readers
  38. var chunkPool = sync.Pool{
  39. New: func() any {
  40. buf := make([]byte, chunkSize)
  41. return &byteChunk{
  42. buf: buf,
  43. data: buf[:0],
  44. }
  45. },
  46. }
  47. // AsyncBuffer is a wrapper around io.Reader that reads data in chunks
  48. // in background and allows reading from synchronously.
  49. type AsyncBuffer struct {
  50. r io.ReadCloser // Upstream reader
  51. chunks []*byteChunk // References to the chunks read from the upstream reader
  52. err atomic.Value // Error that occurred during reading
  53. finished atomic.Bool // Indicates that the reader has finished reading
  54. len atomic.Int64 // Total length of the data read
  55. closed atomic.Bool // Indicates that the reader was closed
  56. paused *Latch // Paused reader does not read data beyond threshold
  57. mu sync.RWMutex // Mutex on chunks slice
  58. newChunkSignal chan struct{} // Tick-tock channel that indicates that a new chunk is ready
  59. }
  60. // Underlying Reader that provides io.ReadSeeker interface for the actual data reading
  61. // What is the purpose of this Reader?
  62. type Reader struct {
  63. ab *AsyncBuffer
  64. pos int64
  65. }
  66. // FromReadCloser creates a new AsyncBuffer that reads from the given io.Reader in background
  67. func FromReader(r io.ReadCloser) *AsyncBuffer {
  68. ab := &AsyncBuffer{
  69. r: r,
  70. newChunkSignal: make(chan struct{}),
  71. paused: NewLatch(),
  72. }
  73. go ab.readChunks()
  74. return ab
  75. }
  76. // getNewChunkSignal returns the channel that signals when a new chunk is ready
  77. // Lock is required to read the channel, so it is not closed while reading
  78. func (ab *AsyncBuffer) getNewChunkSignal() chan struct{} {
  79. ab.mu.RLock()
  80. defer ab.mu.RUnlock()
  81. return ab.newChunkSignal
  82. }
  83. // addChunk adds a new chunk to the AsyncBuffer, increments len and signals that a chunk is ready
  84. func (ab *AsyncBuffer) addChunk(chunk *byteChunk) {
  85. ab.mu.Lock()
  86. defer ab.mu.Unlock()
  87. if ab.closed.Load() {
  88. // If the reader is closed, we return the chunk to the pool
  89. chunkPool.Put(chunk)
  90. return
  91. }
  92. // Store the chunk, increase chunk size, increase length of the data read
  93. ab.chunks = append(ab.chunks, chunk)
  94. ab.len.Add(int64(len(chunk.data)))
  95. // Signal that a chunk is ready
  96. currSignal := ab.newChunkSignal
  97. ab.newChunkSignal = make(chan struct{})
  98. close(currSignal)
  99. }
  100. // finish marks the reader as finished
  101. func (ab *AsyncBuffer) finish() {
  102. ab.mu.Lock()
  103. defer ab.mu.Unlock()
  104. // Indicate that the reader has finished reading
  105. ab.finished.Store(true)
  106. // This indicates that Close() was called before all the chunks were read, we do not need to close the channel
  107. // since it was closed already.
  108. if !ab.closed.Load() {
  109. close(ab.newChunkSignal)
  110. }
  111. err := ab.r.Close() // Close the upstream reader
  112. if err != nil {
  113. // If there was an error while closing the upstream reader, store it
  114. ab.err.Store(err)
  115. return
  116. }
  117. }
  118. // readChunks reads data from the upstream reader in background and stores them in the pool
  119. func (ab *AsyncBuffer) readChunks() {
  120. defer ab.finish()
  121. // Stop reading if the reader is finished
  122. for !ab.finished.Load() {
  123. // In case we are trying to read data beyond threshold and we are paused,
  124. // wait for pause to be released.
  125. if ab.len.Load() >= pauseThreshold {
  126. ab.paused.Wait()
  127. // If the reader has been closed while waiting, we can stop reading
  128. if ab.finished.Load() {
  129. return // No more data to read
  130. }
  131. }
  132. // Get a chunk from the pool
  133. // If the pool is empty, it will create a new byteChunk with ChunkSize
  134. chunk, ok := chunkPool.Get().(*byteChunk)
  135. if !ok {
  136. ab.err.Store(errors.New("asyncbuffer.AsyncBuffer.readChunks: failed to get chunk from pool"))
  137. return
  138. }
  139. // Read data into the chunk's buffer
  140. n, err := io.ReadFull(ab.r, chunk.buf)
  141. // If it's not the EOF, we need to store the error
  142. if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
  143. ab.err.Store(err)
  144. return
  145. }
  146. // No bytes were read (n == 0), we can return the chunk to the pool
  147. if err == io.EOF || n == 0 {
  148. chunkPool.Put(chunk)
  149. return
  150. }
  151. // Resize the chunk's data slice to the number of bytes read
  152. chunk.data = chunk.buf[:n]
  153. // Store the reference to the chunk in the AsyncBuffer
  154. ab.addChunk(chunk)
  155. // We got ErrUnexpectedEOF meaning that some bytes were read, but this is the
  156. // end of the stream, so we can stop reading
  157. if err == io.ErrUnexpectedEOF {
  158. return
  159. }
  160. }
  161. }
  162. // closedError returns an error if the attempt to read on a closed reader was made.
  163. // If the reader had an error, it returns that error instead.
  164. func (ab *AsyncBuffer) closedError() error {
  165. // If the reader is closed, we return the error or nil
  166. if ab.closed.Load() {
  167. err := ab.Error()
  168. if err == nil {
  169. err = errors.New("asyncbuffer.AsyncBuffer.ReadAt: attempt to read on closed reader")
  170. }
  171. return err
  172. }
  173. return nil
  174. }
  175. // offsetAvailable checks if the data at the given offset is available for reading.
  176. // It may return io.EOF if the reader is finished reading and the offset is beyond the end of the stream.
  177. func (ab *AsyncBuffer) offsetAvailable(off int64) (bool, error) {
  178. // We can not read data from the closed reader, none
  179. if ab.closed.Load() {
  180. return false, ab.closedError()
  181. }
  182. // In case we are trying to read data beyond the pause threshold, we need to resume the reader
  183. if off >= pauseThreshold {
  184. ab.paused.Release()
  185. }
  186. // In case the offset falls within the already read chunks, we can return immediately,
  187. // even if error has occurred in the future
  188. if off < ab.len.Load() {
  189. return true, nil
  190. }
  191. // In case the reader is finished reading, and we have not read enough
  192. // data yet, return either error or EOF
  193. if ab.finished.Load() {
  194. // In case, error has occurred, we need to return it
  195. err := ab.Error()
  196. if err != nil {
  197. return false, err
  198. }
  199. // Otherwise, it's EOF if the offset is beyond the end of the stream
  200. return false, io.EOF
  201. }
  202. // No available data
  203. return false, nil
  204. }
  205. // WaitFor waits for the data to be ready at the given offset. nil means ok.
  206. // It guarantees that the chunk at the given offset is ready to be read.
  207. func (ab *AsyncBuffer) WaitFor(off int64) error {
  208. // In case we are trying to read data which would potentially hit the pause threshold,
  209. // we need to unpause the reader ASAP.
  210. if off >= pauseThreshold {
  211. fmt.Println(off, pauseThreshold, "UNLOCKING")
  212. ab.paused.Release()
  213. }
  214. for {
  215. ok, err := ab.offsetAvailable(off)
  216. if ok || err != nil {
  217. return err
  218. }
  219. <-ab.getNewChunkSignal()
  220. }
  221. }
  222. // Wait waits for the reader to finish reading all data and returns
  223. // the total length of the data read.
  224. func (ab *AsyncBuffer) Wait() (int64, error) {
  225. // Wait ends till the end of the stream: unpause the reader
  226. ab.paused.Release()
  227. for {
  228. // We can not read data from the closed reader even if there were no errors
  229. if ab.closed.Load() {
  230. return 0, ab.closedError()
  231. }
  232. // In case the reader is finished reading, we can return immediately
  233. if ab.finished.Load() {
  234. size := ab.len.Load()
  235. // If there was an error during reading, we need to return it no matter what position
  236. // had the error happened
  237. err := ab.err.Load()
  238. if err != nil {
  239. err, ok := err.(error)
  240. if !ok {
  241. return size, errors.New("asyncbuffer.AsyncBuffer.Wait: failed to get error")
  242. }
  243. return size, err
  244. }
  245. return size, nil
  246. }
  247. // Lock until the next chunk is ready
  248. <-ab.getNewChunkSignal()
  249. }
  250. }
  251. // Error returns the error that occurred during reading data in background.
  252. func (ab *AsyncBuffer) Error() error {
  253. err := ab.err.Load()
  254. if err == nil {
  255. return nil
  256. }
  257. errCast, ok := err.(error)
  258. if !ok {
  259. return errors.New("asyncbuffer.AsyncBuffer.Error: failed to get error")
  260. }
  261. return errCast
  262. }
  263. // readChunkAt copies data from the chunk at the given absolute offset to the provided slice.
  264. // Chunk must be available when this method is called.
  265. // Returns the number of bytes copied to the slice or 0 if chunk has no data
  266. // (eg. offset is beyond the end of the stream).
  267. func (ab *AsyncBuffer) readChunkAt(p []byte, off int64) int {
  268. // If the chunk is not available, we return 0
  269. if off >= ab.len.Load() {
  270. return 0
  271. }
  272. ind := off / chunkSize // chunk index
  273. chunk := ab.chunks[ind]
  274. startOffset := off % chunkSize // starting offset in the chunk
  275. // If the offset in current chunk is greater than the data
  276. // it has, we return 0
  277. if startOffset >= int64(len(chunk.data)) {
  278. return 0
  279. }
  280. // Copy data to the target slice. The number of bytes to copy is limited by the
  281. // size of the target slice and the size of the data in the chunk.
  282. return copy(p, chunk.data[startOffset:])
  283. }
  284. // readAt reads data from the AsyncBuffer at the given offset.
  285. //
  286. // Please note that if pause threshold is hit in the middle of the reading,
  287. // the data beyond the threshold may not be available.
  288. //
  289. // If the reader is paused and we try to read data beyond the pause threshold,
  290. // it will wait till something could be returned.
  291. func (ab *AsyncBuffer) readAt(p []byte, off int64) (int, error) {
  292. size := int64(len(p)) // total size of the data to read
  293. if off < 0 {
  294. return 0, errors.New("asyncbuffer.AsyncBuffer.readAt: negative offset")
  295. }
  296. // Wait for the offset to be available.
  297. // It may return io.EOF if the offset is beyond the end of the stream.
  298. err := ab.WaitFor(off)
  299. if err != nil {
  300. return 0, err
  301. }
  302. ab.mu.RLock()
  303. defer ab.mu.RUnlock()
  304. // If the reader is closed, we return an error
  305. if ab.closed.Load() {
  306. return 0, ab.closedError()
  307. }
  308. // Read data from the first chunk
  309. n := ab.readChunkAt(p, off)
  310. if n == 0 {
  311. return 0, io.EOF // Failed to read any data: means we tried to read beyond the end of the stream
  312. }
  313. size -= int64(n)
  314. off += int64(n) // Here and beyond off always points to the last read byte + 1
  315. // Now, let's try to read the rest of the data from next chunks while they are available
  316. for size > 0 {
  317. // If data is not available at the given offset, we can return data read so far.
  318. ok, err := ab.offsetAvailable(off)
  319. if !ok || err != nil {
  320. return n, err
  321. }
  322. // Read data from the next chunk
  323. nX := ab.readChunkAt(p[n:], off)
  324. n += nX
  325. size -= int64(nX)
  326. off += int64(nX)
  327. // If we read data shorter than ChunkSize or, in case that was the last chunk, less than
  328. // the size of the tail, return kind of EOF
  329. if int64(nX) < min(size, int64(chunkSize)) {
  330. return n, io.EOF
  331. }
  332. }
  333. return n, nil
  334. }
  335. // Close closes the AsyncBuffer and releases all resources.
  336. // It returns an error if the reader was already closed or if there was
  337. // an error during reading data in background even if none of the subsequent
  338. // readers have reached the position where the error occurred.
  339. func (ab *AsyncBuffer) Close() error {
  340. ab.mu.Lock()
  341. defer ab.mu.Unlock()
  342. // If the reader is already closed, we return immediately error or nil
  343. if ab.closed.Load() {
  344. return ab.Error()
  345. }
  346. ab.closed.Store(true)
  347. // If the reader is still running, we need to signal that it should stop and close the channel
  348. if !ab.finished.Load() {
  349. ab.finished.Store(true)
  350. close(ab.newChunkSignal)
  351. }
  352. // Return all chunks to the pool
  353. for _, chunk := range ab.chunks {
  354. chunkPool.Put(chunk)
  355. }
  356. ab.paused.Release()
  357. return nil
  358. }
  359. // Reader returns an io.ReadSeeker+io.ReaderAt that can be used to read actual data from the AsyncBuffer
  360. func (ab *AsyncBuffer) Reader() *Reader {
  361. return &Reader{ab: ab, pos: 0}
  362. }
  363. // Read reads data from the AsyncBuffer.
  364. func (r *Reader) Read(p []byte) (int, error) {
  365. n, err := r.ab.readAt(p, r.pos)
  366. if err == nil {
  367. r.pos += int64(n)
  368. }
  369. return n, err
  370. }
  371. // Seek sets the position of the reader to the given offset and returns the new position
  372. func (r *Reader) Seek(offset int64, whence int) (int64, error) {
  373. switch whence {
  374. case io.SeekStart:
  375. r.pos = offset
  376. case io.SeekCurrent:
  377. r.pos += offset
  378. case io.SeekEnd:
  379. size, err := r.ab.Wait()
  380. if err != nil {
  381. return 0, err
  382. }
  383. r.pos = size + offset
  384. default:
  385. return 0, errors.New("asyncbuffer.AsyncBuffer.ReadAt: invalid whence")
  386. }
  387. if r.pos < 0 {
  388. return 0, errors.New("asyncbuffer.AsyncBuffer.ReadAt: negative position")
  389. }
  390. return r.pos, nil
  391. }