buffer.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. // Package asyncbuffer provides an asynchronous buffer that reads data from an
  2. // io.Reader in the background.
  3. //
  4. // When created, AsyncBuffer starts reading from the upstream reader in the
  5. // background. If a read error occurs, it is stored and can be checked with
  6. // AsyncBuffer.Error().
  7. //
  8. // When reading through AsyncBuffer.Reader().Read(), the error is only returned
  9. // once the reader reaches the point where the error occurred. In other words,
  10. // errors are delayed until encountered by the reader.
  11. //
  12. // However, AsyncBuffer.Close() and AsyncBuffer.Error() will immediately return
  13. // any stored error, even if the reader has not yet reached the error point.
  14. package asyncbuffer
  15. import (
  16. "errors"
  17. "io"
  18. "sync"
  19. "sync/atomic"
  20. )
  21. // ChunkSize is the size of each chunk in bytes
  22. const ChunkSize = 4096
  23. // byteChunk is a struct that holds a buffer and the data read from the upstream reader
  24. // data slice is required since the chunk read may be smaller than ChunkSize
  25. type byteChunk struct {
  26. buf []byte
  27. data []byte
  28. }
  29. // chunkPool is a global sync.Pool that holds byteChunk objects for
  30. // all readers
  31. var chunkPool = sync.Pool{
  32. New: func() any {
  33. buf := make([]byte, ChunkSize)
  34. return &byteChunk{
  35. buf: buf,
  36. data: buf[:0],
  37. }
  38. },
  39. }
  40. // AsyncBuffer is a wrapper around io.Reader that reads data in chunks
  41. // in background and allows reading from synchronously.
  42. type AsyncBuffer struct {
  43. r io.Reader // Upstream reader
  44. chunks []*byteChunk // References to the chunks read from the upstream reader
  45. err atomic.Value // Error that occurred during reading
  46. finished atomic.Bool // Indicates that the reader has finished reading
  47. len atomic.Int64 // Total length of the data read
  48. closed atomic.Bool // Indicates that the reader was closed
  49. mu sync.RWMutex // Mutex on chunks slice
  50. newChunkSignal chan struct{} // Tick-tock channel that indicates that a new chunk is ready
  51. }
  52. // Underlying Reader that provides io.ReadSeeker interface for the actual data reading
  53. // What is the purpose of this Reader?
  54. type Reader struct {
  55. ab *AsyncBuffer
  56. pos int64
  57. }
  58. // FromReadCloser creates a new AsyncBuffer that reads from the given io.Reader in background
  59. func FromReader(r io.Reader) *AsyncBuffer {
  60. ab := &AsyncBuffer{
  61. r: r,
  62. newChunkSignal: make(chan struct{}),
  63. }
  64. go ab.readChunks()
  65. return ab
  66. }
  67. // getNewChunkSignal returns the channel that signals when a new chunk is ready
  68. // Lock is required to read the channel, so it is not closed while reading
  69. func (ab *AsyncBuffer) getNewChunkSignal() chan struct{} {
  70. ab.mu.RLock()
  71. defer ab.mu.RUnlock()
  72. return ab.newChunkSignal
  73. }
  74. // addChunk adds a new chunk to the AsyncBuffer, increments len and signals that a chunk is ready
  75. func (ab *AsyncBuffer) addChunk(chunk *byteChunk) {
  76. ab.mu.Lock()
  77. defer ab.mu.Unlock()
  78. // Store the chunk, increase chunk size, increase length of the data read
  79. ab.chunks = append(ab.chunks, chunk)
  80. ab.len.Add(int64(len(chunk.data)))
  81. // Signal that a chunk is ready
  82. currSignal := ab.newChunkSignal
  83. ab.newChunkSignal = make(chan struct{})
  84. close(currSignal)
  85. }
  86. // finish marks the reader as finished
  87. func (ab *AsyncBuffer) finish() {
  88. // Indicate that the reader has finished reading
  89. ab.finished.Store(true)
  90. // This indicates that Close() was called before all the chunks were read, we do not need to close the channel
  91. // since it was closed already.
  92. if !ab.closed.Load() {
  93. close(ab.newChunkSignal)
  94. }
  95. }
  96. // readChunks reads data from the upstream reader in background and stores them in the pool
  97. func (ab *AsyncBuffer) readChunks() {
  98. defer ab.finish()
  99. // Stop reading if the reader is finished
  100. for !ab.finished.Load() {
  101. // Get a chunk from the pool
  102. // If the pool is empty, it will create a new byteChunk with ChunkSize
  103. chunk, ok := chunkPool.Get().(*byteChunk)
  104. if !ok {
  105. ab.err.Store(errors.New("asyncbuffer.AsyncBuffer.readChunks: failed to get chunk from pool"))
  106. return
  107. }
  108. // Read data into the chunk's buffer
  109. n, err := io.ReadFull(ab.r, chunk.buf)
  110. // If it's not the EOF, we need to store the error
  111. if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
  112. ab.err.Store(err)
  113. return
  114. }
  115. // No bytes were read (n == 0), we can return the chunk to the pool
  116. if err == io.EOF || n == 0 {
  117. chunkPool.Put(chunk)
  118. return
  119. }
  120. // Resize the chunk's data slice to the number of bytes read
  121. chunk.data = chunk.buf[:n]
  122. // Store the reference to the chunk in the AsyncBuffer
  123. ab.addChunk(chunk)
  124. // We got ErrUnexpectedEOF meaning that some bytes were read, but this is the
  125. // end of the stream, so we can stop reading
  126. if err == io.ErrUnexpectedEOF {
  127. return
  128. }
  129. }
  130. }
  131. // closedError returns an error if the attempt to read on a closed reader was made.
  132. // If the reader had an error, it returns that error instead.
  133. func (ab *AsyncBuffer) closedError() error {
  134. // If the reader is closed, we return the error or nil
  135. if ab.closed.Load() {
  136. err := ab.Error()
  137. if err == nil {
  138. err = errors.New("asyncbuffer.AsyncBuffer.ReadAt: attempt to read on closed reader")
  139. }
  140. return err
  141. }
  142. return nil
  143. }
  144. // offsetAvailable checks if the data at the given offset is available for reading.
  145. // It may return io.EOF if the reader is finished reading and the offset is beyond the end of the stream.
  146. func (ab *AsyncBuffer) offsetAvailable(off int64) (bool, error) {
  147. // We can not read data from the closed reader, none
  148. if ab.closed.Load() {
  149. return false, ab.closedError()
  150. }
  151. // In case the offset falls within the already read chunks, we can return immediately,
  152. // even if error has occurred in the future
  153. if off < ab.len.Load() {
  154. return true, nil
  155. }
  156. // In case the reader is finished reading, and we have not read enough
  157. // data yet, return either error or EOF
  158. if ab.finished.Load() {
  159. // In case, error has occurred, we need to return it
  160. err := ab.Error()
  161. if err != nil {
  162. return false, err
  163. }
  164. // Otherwise, it's EOF if the offset is beyond the end of the stream
  165. return false, io.EOF
  166. }
  167. // No available data
  168. return false, nil
  169. }
  170. // WaitFor waits for the data to be ready at the given offset. nil means ok.
  171. // It guarantees that the chunk at the given offset is ready to be read.
  172. func (ab *AsyncBuffer) WaitFor(off int64) error {
  173. for {
  174. ok, err := ab.offsetAvailable(off)
  175. if ok || err != nil {
  176. return err
  177. }
  178. <-ab.getNewChunkSignal()
  179. }
  180. }
  181. // Wait waits for the reader to finish reading all data and returns
  182. // the total length of the data read.
  183. func (ab *AsyncBuffer) Wait() (int64, error) {
  184. for {
  185. // We can not read data from the closed reader even if there were no errors
  186. if ab.closed.Load() {
  187. return 0, ab.closedError()
  188. }
  189. // In case the reader is finished reading, we can return immediately
  190. if ab.finished.Load() {
  191. size := ab.len.Load()
  192. // If there was an error during reading, we need to return it no matter what position
  193. // had the error happened
  194. err := ab.err.Load()
  195. if err != nil {
  196. err, ok := err.(error)
  197. if !ok {
  198. return size, errors.New("asyncbuffer.AsyncBuffer.Wait: failed to get error")
  199. }
  200. return size, err
  201. }
  202. return size, nil
  203. }
  204. // Lock until the next chunk is ready
  205. <-ab.getNewChunkSignal()
  206. }
  207. }
  208. // Error returns the error that occurred during reading data in background.
  209. func (ab *AsyncBuffer) Error() error {
  210. err := ab.err.Load()
  211. if err == nil {
  212. return nil
  213. }
  214. errCast, ok := err.(error)
  215. if !ok {
  216. return errors.New("asyncbuffer.AsyncBuffer.Error: failed to get error")
  217. }
  218. return errCast
  219. }
  220. // readChunkAt copies data from the chunk at the given absolute offset to the provided slice.
  221. // Chunk must be available when this method is called.
  222. // Returns the number of bytes copied to the slice or 0 if chunk has no data
  223. // (eg. offset is beyond the end of the stream).
  224. func (ab *AsyncBuffer) readChunkAt(p []byte, off, rem int64) int {
  225. // If the chunk is not available, we return 0
  226. if off >= ab.len.Load() {
  227. return 0
  228. }
  229. ind := off / ChunkSize // chunk index
  230. chunk := ab.chunks[ind]
  231. startOffset := off % ChunkSize // starting offset in the chunk
  232. // If the offset in current chunk is greater than the data
  233. // it has, we return 0
  234. if startOffset >= int64(len(chunk.data)) {
  235. return 0
  236. }
  237. // How many bytes we could read from the chunk. No more than:
  238. // - left to read totally
  239. // - chunk size minus the start offset
  240. // - chunk has
  241. size := min(rem, ChunkSize-startOffset, int64(len(chunk.data)))
  242. if size == 0 {
  243. return 0
  244. }
  245. return copy(p, chunk.data[startOffset:startOffset+size])
  246. }
  247. // readAt reads data from the AsyncBuffer at the given offset.
  248. //
  249. // If full is true:
  250. //
  251. // The behaviour is similar to io.ReaderAt.ReadAt. It blocks until the maxumum amount of data possible
  252. // is read from the buffer. It may return io.UnexpectedEOF in case we tried to read more data than was
  253. // available in the buffer.
  254. //
  255. // If full is false:
  256. //
  257. // It behaves like a regular non-blocking Read.
  258. func (ab *AsyncBuffer) readAt(p []byte, off int64) (int, error) {
  259. size := int64(len(p)) // total size of the data to read
  260. if off < 0 {
  261. return 0, errors.New("asyncbuffer.AsyncBuffer.readAt: negative offset")
  262. }
  263. // Wait for the offset to be available.
  264. // It may return io.EOF if the offset is beyond the end of the stream.
  265. err := ab.WaitFor(off)
  266. if err != nil {
  267. return 0, err
  268. }
  269. ab.mu.RLock()
  270. defer ab.mu.RUnlock()
  271. // If the reader is closed, we return an error
  272. if ab.closed.Load() {
  273. return 0, ab.closedError()
  274. }
  275. // Read data from the first chunk
  276. n := ab.readChunkAt(p, off, size)
  277. if n == 0 {
  278. return 0, io.EOF // Failed to read any data: means we tried to read beyond the end of the stream
  279. }
  280. size -= int64(n)
  281. off += int64(n) // Here and beyond off always points to the last read byte + 1
  282. // Now, let's try to read the rest of the data from next chunks while they are available
  283. for size > 0 {
  284. // If data is not available at the given offset, we can return data read so far.
  285. ok, err := ab.offsetAvailable(off)
  286. if !ok || err != nil {
  287. return n, err
  288. }
  289. // Read data from the next chunk
  290. nX := ab.readChunkAt(p[n:], off, size)
  291. n += nX
  292. size -= int64(nX)
  293. off += int64(nX)
  294. // If we read data shorter than ChunkSize or, in case that was the last chunk, less than
  295. // the size of the tail, return kind of EOF
  296. if int64(nX) < min(size, int64(ChunkSize)) {
  297. return n, io.EOF
  298. }
  299. }
  300. return n, nil
  301. }
  302. // Close closes the AsyncBuffer and releases all resources.
  303. // It returns an error if the reader was already closed or if there was
  304. // an error during reading data in background even if none of the subsequent
  305. // readers have reached the position where the error occurred.
  306. func (ab *AsyncBuffer) Close() error {
  307. ab.mu.Lock()
  308. defer ab.mu.Unlock()
  309. // If the reader is already closed, we return immediately error or nil
  310. if ab.closed.Load() {
  311. return ab.Error()
  312. }
  313. ab.closed.Store(true)
  314. // If the reader is still running, we need to signal that it should stop and close the channel
  315. if !ab.finished.Load() {
  316. ab.finished.Store(true)
  317. close(ab.newChunkSignal)
  318. }
  319. // Return all chunks to the pool
  320. for _, chunk := range ab.chunks {
  321. chunkPool.Put(chunk)
  322. }
  323. return nil
  324. }
  325. // Reader returns an io.ReadSeeker+io.ReaderAt that can be used to read actual data from the AsyncBuffer
  326. func (ab *AsyncBuffer) Reader() *Reader {
  327. return &Reader{ab: ab, pos: 0}
  328. }
  329. // Read reads data from the AsyncBuffer.
  330. func (r *Reader) Read(p []byte) (int, error) {
  331. n, err := r.ab.readAt(p, r.pos)
  332. if err != nil {
  333. return n, err
  334. }
  335. r.pos += int64(n)
  336. return n, nil
  337. }
  338. // Seek sets the position of the reader to the given offset and returns the new position
  339. func (r *Reader) Seek(offset int64, whence int) (int64, error) {
  340. switch whence {
  341. case io.SeekStart:
  342. r.pos = offset
  343. case io.SeekCurrent:
  344. r.pos += offset
  345. case io.SeekEnd:
  346. size, err := r.ab.Wait()
  347. if err != nil {
  348. return 0, err
  349. }
  350. r.pos = size + offset
  351. default:
  352. return 0, errors.New("asyncbuffer.AsyncBuffer.ReadAt: invalid whence")
  353. }
  354. if r.pos < 0 {
  355. return 0, errors.New("asyncbuffer.AsyncBuffer.ReadAt: negative position")
  356. }
  357. return r.pos, nil
  358. }