| author | Cosmin Sabou <csabou@mozilla.com> |
| Thu, 12 Dec 2019 20:37:30 +0200 | |
| changeset 506729 | ee4d1ff71f58952162907aa41ef891f6eedea58b |
| parent 506728 | 47a7a6fcaa2d43d6d514d98f522fa63e57f70535 |
| child 506730 | 97c087d81e8b08bc61b5e09694c32397a036bd76 |
| push id | 36911 |
| push user | csabou@mozilla.com |
| push date | Fri, 13 Dec 2019 04:07:58 +0000 |
| treeherder | mozilla-central@61ec58edfd13 [default view] [failures only] |
| perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
| bugs | 1598380 |
| milestone | 73.0a1 |
| backs out | 3f9482614123972f6fcd8b6a1d99dc8db8cb6c07 |
| first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
| last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/gfx/wr/webrender/src/device/gl.rs +++ b/gfx/wr/webrender/src/device/gl.rs @@ -2692,77 +2692,39 @@ impl Device { } pub fn delete_pbo(&mut self, mut pbo: PBO) { self.gl.delete_buffers(&[pbo.id]); pbo.id = 0; pbo.reserved_size = 0 } - /// Returns the size in bytes required to upload an area of pixels of the specified - /// size, with the specified stride, to a texture of the specified format. - pub fn required_upload_size(size: DeviceIntSize, stride: Option<i32>, format: ImageFormat, optimal_pbo_stride: NonZeroUsize) -> usize { - assert!(size.width >= 0); - assert!(size.height >= 0); - if let Some(stride) = stride { - assert!(stride >= 0); - } - - let bytes_pp = format.bytes_per_pixel() as usize; - let width_bytes = size.width as usize * bytes_pp; - let src_stride = stride.map_or(width_bytes, |stride| { - assert!(stride >= 0); - stride as usize - }); - - let dst_stride = round_up_to_multiple(src_stride, optimal_pbo_stride); - - // The size of the chunk should only need to be (height - 1) * dst_stride + width_bytes, - // however, the android emulator will error unless it is height * dst_stride. - // See bug 1587047 for details. - // Using the full final row also ensures that the offset of the next chunk is - // optimally aligned. - dst_stride * size.height as usize - } - - /// Returns a `TextureUploader` which can be used to upload texture data to `texture`. - /// The total size in bytes is specified by `upload_size`, and must be greater than zero - /// and at least as large as the sum of the `required_upload_size()` for each subsequent - /// call to `TextureUploader.upload()`. pub fn upload_texture<'a, T>( &'a mut self, texture: &'a Texture, pbo: &PBO, - upload_size: usize, + upload_count: usize, ) -> TextureUploader<'a, T> { debug_assert!(self.inside_frame); - assert_ne!(upload_size, 0, "Must specify valid upload size"); - self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default()); let buffer = match self.upload_method { UploadMethod::Immediate => None, UploadMethod::PixelBuffer(hint) => { + let upload_size = upload_count * mem::size_of::<T>(); self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id); - self.gl.buffer_data_untyped( - gl::PIXEL_UNPACK_BUFFER, - upload_size as _, - ptr::null(), - hint.to_gl(), - ); - let ptr = self.gl.map_buffer_range( - gl::PIXEL_UNPACK_BUFFER, - 0, - upload_size as _, - gl::MAP_WRITE_BIT | gl::MAP_INVALIDATE_BUFFER_BIT, - ); - let mapping = unsafe { - slice::from_raw_parts_mut(ptr as *mut _, upload_size) - }; - Some(PixelBuffer::new(upload_size, mapping)) + if upload_size != 0 { + self.gl.buffer_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + upload_size as _, + ptr::null(), + hint.to_gl(), + ); + } + Some(PixelBuffer::new(hint.to_gl(), upload_size)) }, }; TextureUploader { target: UploadTarget { gl: &*self.gl, bgra_format: self.bgra_formats.external, optimal_pbo_stride: self.optimal_pbo_stride, @@ -3518,55 +3480,54 @@ pub struct FormatDesc { struct UploadChunk { rect: DeviceIntRect, layer_index: i32, stride: Option<i32>, offset: usize, format_override: Option<ImageFormat>, } -struct PixelBuffer<'a> { +struct PixelBuffer { + usage: gl::GLenum, size_allocated: usize, size_used: usize, // small vector avoids heap allocation for a single chunk chunks: SmallVec<[UploadChunk; 1]>, - mapping: &'a mut [mem::MaybeUninit<u8>], } -impl<'a> PixelBuffer<'a> { +impl PixelBuffer { fn new( + usage: gl::GLenum, size_allocated: usize, - mapping: &'a mut [mem::MaybeUninit<u8>], ) -> Self { PixelBuffer { + usage, size_allocated, size_used: 0, chunks: SmallVec::new(), - mapping, } } } struct UploadTarget<'a> { gl: &'a dyn gl::Gl, bgra_format: gl::GLuint, optimal_pbo_stride: NonZeroUsize, texture: &'a Texture, } pub struct TextureUploader<'a, T> { target: UploadTarget<'a>, - buffer: Option<PixelBuffer<'a>>, + buffer: Option<PixelBuffer>, marker: PhantomData<T>, } impl<'a, T> Drop for TextureUploader<'a, T> { fn drop(&mut self) { if let Some(buffer) = self.buffer.take() { - self.target.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER); for chunk in buffer.chunks { self.target.update_impl(chunk); } self.target.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0); } } } @@ -3598,52 +3559,80 @@ impl<'a, T> TextureUploader<'a, T> { let src_stride = stride.map_or(width_bytes, |stride| { assert!(stride >= 0); stride as usize }); let src_size = (rect.size.height as usize - 1) * src_stride + width_bytes; assert!(src_size <= len * mem::size_of::<T>()); - // for optimal PBO texture uploads the offset and stride of the data in + // for optimal PBO texture uploads the stride of the data in // the buffer may have to be a multiple of a certain value. let dst_stride = round_up_to_multiple(src_stride, self.target.optimal_pbo_stride); - let dst_size = Device::required_upload_size( - rect.size, - stride, - self.target.texture.format, - self.target.optimal_pbo_stride - ); + // The size of the PBO should only need to be (height - 1) * dst_stride + width_bytes, + // however, the android emulator will error unless it is height * dst_stride. + // See bug 1587047 for details. + let dst_size = rect.size.height as usize * dst_stride; match self.buffer { Some(ref mut buffer) => { - assert!(buffer.size_used + dst_size <= buffer.size_allocated, "UploadBuffer is too small"); - - unsafe { - let src: &[mem::MaybeUninit<u8>] = slice::from_raw_parts(data as *const _, src_size); - - if src_stride == dst_stride { - // the stride is already optimal, so simply copy - // the data as-is in to the buffer - let dst_start = buffer.size_used; - let dst_end = dst_start + src_size; - - buffer.mapping[dst_start..dst_end].copy_from_slice(src); - } else { - // copy the data line-by-line in to the buffer so - // that it has an optimal stride + if buffer.size_used + dst_size > buffer.size_allocated { + // flush + for chunk in buffer.chunks.drain() { + self.target.update_impl(chunk); + } + buffer.size_used = 0; + } + + if dst_size > buffer.size_allocated { + // allocate a buffer large enough + self.target.gl.buffer_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + dst_size as _, + ptr::null(), + buffer.usage, + ); + buffer.size_allocated = dst_size; + } + + if src_stride == dst_stride { + // the stride is already optimal, so simply copy + // the data as-is in to the buffer + assert_eq!(src_size % mem::size_of::<T>(), 0); + self.target.gl.buffer_sub_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + buffer.size_used as isize, + src_size as isize, + data as *const _, + ); + } else { + // copy the data line-by-line in to the buffer so + // that it has an optimal stride + let ptr = self.target.gl.map_buffer_range( + gl::PIXEL_UNPACK_BUFFER, + buffer.size_used as _, + dst_size as _, + gl::MAP_WRITE_BIT | gl::MAP_INVALIDATE_RANGE_BIT, + ); + + unsafe { + let src: &[mem::MaybeUninit<u8>] = slice::from_raw_parts(data as *const _, src_size); + let dst: &mut [mem::MaybeUninit<u8>] = slice::from_raw_parts_mut(ptr as *mut _, dst_size); + for y in 0..rect.size.height as usize { let src_start = y * src_stride; let src_end = src_start + width_bytes; - let dst_start = buffer.size_used + y * dst_stride; + let dst_start = y * dst_stride; let dst_end = dst_start + width_bytes; - buffer.mapping[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) + dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) } } + + self.target.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER); } buffer.chunks.push(UploadChunk { rect, layer_index, stride: Some(dst_stride as i32), offset: buffer.size_used, format_override,
--- a/gfx/wr/webrender/src/internal_types.rs +++ b/gfx/wr/webrender/src/internal_types.rs @@ -358,26 +358,26 @@ pub struct TextureCacheUpdate { #[derive(Default)] pub struct TextureUpdateList { /// Indicates that there was some kind of cleanup clear operation. Used for /// sanity checks. pub clears_shared_cache: bool, /// Commands to alloc/realloc/free the textures. Processed first. pub allocations: Vec<TextureCacheAllocation>, /// Commands to update the contents of the textures. Processed second. - pub updates: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>, + pub updates: Vec<TextureCacheUpdate>, } impl TextureUpdateList { /// Mints a new `TextureUpdateList`. pub fn new() -> Self { TextureUpdateList { clears_shared_cache: false, allocations: Vec::new(), - updates: FastHashMap::default(), + updates: Vec::new(), } } /// Returns true if this is a no-op (no updates to be applied). pub fn is_nop(&self) -> bool { self.allocations.is_empty() && self.updates.is_empty() } @@ -385,20 +385,17 @@ impl TextureUpdateList { #[inline] pub fn note_clear(&mut self) { self.clears_shared_cache = true; } /// Pushes an update operation onto the list. #[inline] pub fn push_update(&mut self, update: TextureCacheUpdate) { - self.updates - .entry(update.id) - .or_default() - .push(update); + self.updates.push(update); } /// Sends a command to the Renderer to clear the portion of the shared region /// we just freed. Used when the texture cache debugger is enabled. #[cold] pub fn push_debug_clear( &mut self, id: CacheTextureId, @@ -478,17 +475,17 @@ impl TextureUpdateList { } /// Pushes a free operation onto the list, potentially coalescing with /// previous operations. pub fn push_free(&mut self, id: CacheTextureId) { self.debug_assert_coalesced(id); // Drop any unapplied updates to the to-be-freed texture. - self.updates.remove(&id); + self.updates.retain(|x| x.id != id); // Drop any allocations for it as well. If we happen to be allocating and // freeing in the same batch, we can collapse them to a no-op. let idx = self.allocations.iter().position(|x| x.id == id); let removed_kind = idx.map(|i| self.allocations.remove(i).kind); match removed_kind { Some(TextureCacheAllocationKind::Alloc(..)) => { /* no-op! */ }, Some(TextureCacheAllocationKind::Free) => panic!("Double free"),
--- a/gfx/wr/webrender/src/renderer.rs +++ b/gfx/wr/webrender/src/renderer.rs @@ -1514,27 +1514,20 @@ impl GpuCacheTexture { let rows_dirty = rows .iter() .filter(|row| row.is_dirty) .count(); if rows_dirty == 0 { return 0 } - let upload_size = rows_dirty * Device::required_upload_size( - DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1), - None, - texture.get_format(), - device.optimal_pbo_stride(), - ); - let mut uploader = device.upload_texture( texture, buffer, - upload_size, + rows_dirty * MAX_VERTEX_TEXTURE_WIDTH, ); for (row_index, row) in rows.iter_mut().enumerate() { if !row.is_dirty { continue; } let rect = DeviceIntRect::new( @@ -1665,24 +1658,18 @@ impl<T> VertexDataTexture<T> { (MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)) as i32; let rect = DeviceIntRect::new( DeviceIntPoint::zero(), DeviceIntSize::new(logical_width, needed_height), ); debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds"); - let upload_size = Device::required_upload_size( - rect.size, - None, - self.texture().get_format(), - device.optimal_pbo_stride(), - ); device - .upload_texture(self.texture(), &self.pbo, upload_size) + .upload_texture(self.texture(), &self.pbo, 0) .upload(rect, 0, None, None, data.as_ptr(), len); } fn deinit(mut self, device: &mut Device) { device.delete_pbo(self.pbo); if let Some(t) = self.texture.take() { device.delete_texture(t); } @@ -3462,118 +3449,93 @@ impl Renderer { } } if let Some(old) = old { self.device.delete_texture(old); } } - for (texture_id, updates) in update_list.updates { - let texture = &self.texture_resolver.texture_cache_map[&texture_id]; - let device = &mut self.device; - - // Calculate the total size of buffer required to upload all updates. - let required_size = updates.iter().map(|update| { - // Perform any debug clears now. As this requires a mutable borrow of device, - // it must be done before all the updates which require a TextureUploader. - if let TextureUpdateSource::DebugClear = update.source { - let draw_target = DrawTarget::from_texture( + for update in update_list.updates { + let TextureCacheUpdate { id, rect, stride, offset, layer_index, format_override, source } = update; + let texture = &self.texture_resolver.texture_cache_map[&id]; + + let bytes_uploaded = match source { + TextureUpdateSource::Bytes { data } => { + let mut uploader = self.device.upload_texture( texture, - update.layer_index as usize, - false, + &self.texture_cache_upload_pbo, + 0, ); - device.bind_draw_target(draw_target); - device.clear_target( - Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), - None, - Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) - ); - - 0 - } else { - Device::required_upload_size( - update.rect.size, - update.stride, - texture.get_format(), - device.optimal_pbo_stride(), + let data = &data[offset as usize ..]; + uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len(), ) } - }).sum(); - - if required_size == 0 { - break; - } - - // For best performance we use a single TextureUploader for all uploads. - // Using individual TextureUploaders was causing performance issues on some drivers - // due to allocating too many PBOs. - let mut uploader = device.upload_texture( - texture, - &self.texture_cache_upload_pbo, - required_size - ); - - for update in updates { - let TextureCacheUpdate { id: _id, rect, stride, offset, layer_index, format_override, source } = update; - - let bytes_uploaded = match source { - TextureUpdateSource::Bytes { data } => { - let data = &data[offset as usize ..]; - uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len(), - ) - } - TextureUpdateSource::External { id, channel_index } => { - let handler = self.external_image_handler - .as_mut() - .expect("Found external image, but no handler set!"); - // The filter is only relevant for NativeTexture external images. - let dummy_data; - let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { - ExternalImageSource::RawData(data) => { - &data[offset as usize ..] - } - ExternalImageSource::Invalid => { - // Create a local buffer to fill the pbo. - let bpp = texture.get_format().bytes_per_pixel(); - let width = stride.unwrap_or(rect.size.width * bpp); - let total_size = width * rect.size.height; - // WR haven't support RGBAF32 format in texture_cache, so - // we use u8 type here. - dummy_data = vec![0xFFu8; total_size as usize]; - &dummy_data - } - ExternalImageSource::NativeTexture(eid) => { - panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); - } - }; - let size = uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len() - ); - handler.unlock(id, channel_index); - size - } - TextureUpdateSource::DebugClear => { - // DebugClear updates are handled separately. - 0 - } - }; - self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); - } + TextureUpdateSource::External { id, channel_index } => { + let mut uploader = self.device.upload_texture( + texture, + &self.texture_cache_upload_pbo, + 0, + ); + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + // The filter is only relevant for NativeTexture external images. + let dummy_data; + let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { + ExternalImageSource::RawData(data) => { + &data[offset as usize ..] + } + ExternalImageSource::Invalid => { + // Create a local buffer to fill the pbo. + let bpp = texture.get_format().bytes_per_pixel(); + let width = stride.unwrap_or(rect.size.width * bpp); + let total_size = width * rect.size.height; + // WR haven't support RGBAF32 format in texture_cache, so + // we use u8 type here. + dummy_data = vec![0xFFu8; total_size as usize]; + &dummy_data + } + ExternalImageSource::NativeTexture(eid) => { + panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); + } + }; + let size = uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len() + ); + handler.unlock(id, channel_index); + size + } + TextureUpdateSource::DebugClear => { + let draw_target = DrawTarget::from_texture( + texture, + layer_index as usize, + false, + ); + self.device.bind_draw_target(draw_target); + self.device.clear_target( + Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), + None, + Some(draw_target.to_framebuffer_rect(rect.to_i32())) + ); + 0 + } + }; + self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); } if update_list.clears_shared_cache { self.shared_texture_cache_cleared = true; } } drain_filter(