diff --git a/Cargo.lock b/Cargo.lock index 54626b3..d8abaab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -781,6 +781,20 @@ dependencies = [ "nonmax", ] +[[package]] +name = "bevy_cuda" +version = "0.1.0" +source = "git+https://github.com/tychedelia/bevy_cuda#f1e63bb2811eddfb6ed13ff9920f7ae2be091da1" +dependencies = [ + "ash", + "bevy", + "cudarc", + "thiserror 2.0.18", + "wgpu", + "wgpu-hal", + "windows 0.58.0", +] + [[package]] name = "bevy_derive" version = "0.19.0-dev" @@ -2676,6 +2690,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "cudarc" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3aa12038120eb13347a6ae2ffab1d34efe78150125108627fd85044dd4d6ff1e" +dependencies = [ + "libloading", +] + [[package]] name = "cursor-icon" version = "1.2.0" @@ -5606,6 +5629,7 @@ dependencies = [ "bevy", "js-sys", "processing_core", + "processing_cuda", "processing_glfw", "processing_input", "processing_midi", @@ -5628,6 +5652,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "processing_cuda" +version = "0.1.0" +dependencies = [ + "bevy", + "bevy_cuda", + "processing_core", + "processing_render", +] + [[package]] name = "processing_ffi" version = "0.0.1" @@ -5672,6 +5706,7 @@ dependencies = [ "bevy", "png", "processing", + "processing_cuda", "processing_glfw", "processing_webcam", "pyo3", @@ -5753,6 +5788,7 @@ name = "pyo3" version = "0.28.3" source = "git+https://github.com/PyO3/pyo3?branch=main#df36c7165663ec70180d71495bd52031edd075d4" dependencies = [ + "inventory", "libc", "once_cell", "portable-atomic", @@ -7331,6 +7367,7 @@ dependencies = [ "js-sys", "log", "naga", + "parking_lot", "portable-atomic", "profiling", "raw-window-handle", @@ -7370,6 +7407,7 @@ dependencies = [ "smallvec", "thiserror 2.0.18", "wgpu-core-deps-apple", + "wgpu-core-deps-emscripten", "wgpu-core-deps-wasm", "wgpu-core-deps-windows-linux-android", "wgpu-hal", @@ -7386,6 +7424,15 @@ dependencies = [ "wgpu-hal", ] +[[package]] +name = "wgpu-core-deps-emscripten" +version = "29.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef043bf135cc68b6f667c55ff4e345ce2b5924d75bad36a47921b0287ca4b24a" +dependencies = [ + "wgpu-hal", +] + [[package]] name = "wgpu-core-deps-wasm" version = "29.0.0" diff --git a/Cargo.toml b/Cargo.toml index bdde818..5ad01ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ default = ["wayland"] wayland = ["processing_render/wayland"] x11 = ["processing_render/x11"] webcam = ["dep:processing_webcam"] +cuda = ["dep:processing_cuda"] [workspace] resolver = "3" @@ -24,12 +25,14 @@ too_many_arguments = "allow" [workspace.dependencies] bevy = { git = "https://github.com/bevyengine/bevy", branch = "main", features = ["file_watcher", "shader_format_wesl", "free_camera", "pan_camera"] } bevy_naga_reflect = { git = "https://github.com/tychedelia/bevy_naga_reflect" } +bevy_cuda = { git = "https://github.com/tychedelia/bevy_cuda" } naga = { version = "29", features = ["wgsl-in"] } wesl = { version = "0.3", default-features = false } pyo3 = { git = "https://github.com/PyO3/pyo3", branch = "main" } pyo3-introspection = { git = "https://github.com/PyO3/pyo3", branch = "main" } processing = { path = "." } processing_core = { path = "crates/processing_core" } +processing_cuda = { path = "crates/processing_cuda" } processing_pyo3 = { path = "crates/processing_pyo3" } processing_render = { path = "crates/processing_render" } processing_midi = { path = "crates/processing_midi" } @@ -44,6 +47,7 @@ processing_render = { workspace = true } processing_midi = { workspace = true } processing_input = { workspace = true } processing_webcam = { workspace = true, optional = true } +processing_cuda = { workspace = true, optional = true } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/crates/processing_core/src/error.rs b/crates/processing_core/src/error.rs index dfd00ba..7cc1e9e 100644 --- a/crates/processing_core/src/error.rs +++ b/crates/processing_core/src/error.rs @@ -44,4 +44,6 @@ pub enum ProcessingError { ShaderNotFound, #[error("MIDI port {0} not found")] MidiPortNotFound(usize), + #[error("CUDA error: {0}")] + CudaError(String), } diff --git a/crates/processing_cuda/Cargo.toml b/crates/processing_cuda/Cargo.toml new file mode 100644 index 0000000..01310f2 --- /dev/null +++ b/crates/processing_cuda/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "processing_cuda" +version = "0.1.0" +edition = "2024" + +[lints] +workspace = true + +[features] +default = ["cuda-11040"] +cuda-11040 = ["bevy_cuda/cuda-11040"] + +[dependencies] +bevy = { workspace = true } +bevy_cuda = { workspace = true } +processing_core = { workspace = true } +processing_render = { workspace = true } diff --git a/crates/processing_cuda/src/lib.rs b/crates/processing_cuda/src/lib.rs new file mode 100644 index 0000000..e83a375 --- /dev/null +++ b/crates/processing_cuda/src/lib.rs @@ -0,0 +1,213 @@ +use bevy::prelude::*; +use bevy::render::RenderApp; +use bevy::render::render_resource::{Texture, TextureFormat}; +use bevy::render::renderer::RenderDevice; +use bevy_cuda::{CudaBuffer, CudaContext}; +use processing_core::app_mut; +use processing_core::error::{ProcessingError, Result}; +use processing_render::graphics::view_target; +use processing_render::image::{Image, gpu_image, pixel_size}; + +#[derive(Component)] +pub struct CudaImageBuffer { + pub buffer: CudaBuffer, + pub width: u32, + pub height: u32, + pub texture_format: TextureFormat, +} + +pub struct CudaPlugin; + +impl Plugin for CudaPlugin { + fn build(&self, _app: &mut App) {} + + fn finish(&self, app: &mut App) { + let render_app = app.sub_app(RenderApp); + let render_device = render_app.world().resource::(); + let wgpu_device = render_device.wgpu_device(); + match CudaContext::new(wgpu_device, 0) { + Ok(ctx) => { + app.insert_resource(ctx); + } + Err(e) => { + warn!("CUDA not available, GPU interop disabled: {e}"); + } + } + } +} + +fn cuda_ctx(world: &World) -> Result<&CudaContext> { + world + .get_resource::() + .ok_or(ProcessingError::CudaError("CUDA not available".into())) +} + +fn resolve_texture(app: &mut App, entity: Entity) -> Result<(Texture, TextureFormat, u32, u32)> { + if app.world().get::(entity).is_some() { + let texture = gpu_image(app, entity)?.texture.clone(); + let p_image = app.world().get::(entity).unwrap(); + return Ok(( + texture, + p_image.texture_format, + p_image.size.width, + p_image.size.height, + )); + } + if let Ok(vt) = view_target(app, entity) { + let texture = vt.main_texture().clone(); + let fmt = vt.main_texture_format(); + let size = texture.size(); + return Ok((texture, fmt, size.width, size.height)); + } + Err(ProcessingError::ImageNotFound) +} + +pub fn cuda_export(entity: Entity) -> Result<()> { + app_mut(|app| { + let (texture, texture_format, width, height) = resolve_texture(app, entity)?; + + let px_size = pixel_size(texture_format)?; + let buffer_size = (width as u64) * (height as u64) * (px_size as u64); + + let existing = app.world().get::(entity); + let needs_alloc = existing.is_none_or(|buf| buf.buffer.size() != buffer_size); + + if needs_alloc { + let cuda_ctx = cuda_ctx(app.world())?; + let buffer = cuda_ctx + .create_buffer(buffer_size) + .map_err(|e| ProcessingError::CudaError(format!("Buffer creation failed: {e}")))?; + app.world_mut().entity_mut(entity).insert(CudaImageBuffer { + buffer, + width, + height, + texture_format, + }); + } + + let world = app.world(); + let cuda_buf = world.get::(entity).unwrap(); + let cuda_ctx = cuda_ctx(world)?; + + cuda_ctx + .copy_texture_to_buffer(&texture, &cuda_buf.buffer, width, height, texture_format) + .map_err(|e| { + ProcessingError::CudaError(format!("Texture-to-buffer copy failed: {e}")) + })?; + + Ok(()) + }) +} + +pub fn cuda_import(entity: Entity, src_device_ptr: u64, byte_size: u64) -> Result<()> { + app_mut(|app| { + let (texture, texture_format, width, height) = resolve_texture(app, entity)?; + + let existing = app.world().get::(entity); + let needs_alloc = existing.is_none_or(|buf| buf.buffer.size() != byte_size); + + if needs_alloc { + let cuda_ctx = cuda_ctx(app.world())?; + let buffer = cuda_ctx + .create_buffer(byte_size) + .map_err(|e| ProcessingError::CudaError(format!("Buffer creation failed: {e}")))?; + app.world_mut().entity_mut(entity).insert(CudaImageBuffer { + buffer, + width, + height, + texture_format, + }); + } + + let world = app.world(); + let cuda_buf = world.get::(entity).unwrap(); + let cuda_ctx = cuda_ctx(world)?; + + // wait for work (i.e. python) to be done with the buffer before we read from it + cuda_ctx + .synchronize() + .map_err(|e| ProcessingError::CudaError(format!("synchronize failed: {e}")))?; + + cuda_buf + .buffer + .copy_from_device_ptr(src_device_ptr, byte_size) + .map_err(|e| ProcessingError::CudaError(format!("memcpy_dtod failed: {e}")))?; + + cuda_ctx + .copy_buffer_to_texture(&cuda_buf.buffer, &texture, width, height, texture_format) + .map_err(|e| { + ProcessingError::CudaError(format!("Buffer-to-texture copy failed: {e}")) + })?; + + Ok(()) + }) +} + +pub fn cuda_write_back(entity: Entity) -> Result<()> { + app_mut(|app| { + let (texture, _, _, _) = resolve_texture(app, entity)?; + + let cuda_buf = app + .world() + .get::(entity) + .ok_or(ProcessingError::ImageNotFound)?; + + let cuda_ctx = cuda_ctx(app.world())?; + + cuda_ctx + .copy_buffer_to_texture( + &cuda_buf.buffer, + &texture, + cuda_buf.width, + cuda_buf.height, + cuda_buf.texture_format, + ) + .map_err(|e| { + ProcessingError::CudaError(format!("Buffer-to-texture copy failed: {e}")) + })?; + + Ok(()) + }) +} + +pub struct CudaBufferInfo { + pub device_ptr: u64, + pub width: u32, + pub height: u32, + pub texture_format: TextureFormat, +} + +pub fn cuda_buffer(entity: Entity) -> Result { + app_mut(|app| { + let cuda_buf = app + .world() + .get::(entity) + .ok_or(ProcessingError::ImageNotFound)?; + Ok(CudaBufferInfo { + device_ptr: cuda_buf.buffer.device_ptr(), + width: cuda_buf.width, + height: cuda_buf.height, + texture_format: cuda_buf.texture_format, + }) + }) +} + +pub fn typestr_for_format(format: TextureFormat) -> Result<&'static str> { + match format { + TextureFormat::Rgba8Unorm | TextureFormat::Rgba8UnormSrgb => Ok("|u1"), + TextureFormat::Rgba16Float => Ok(" Ok(" Err(ProcessingError::UnsupportedTextureFormat), + } +} + +pub fn elem_size_for_typestr(typestr: &str) -> Result { + match typestr { + "|u1" => Ok(1), + " Ok(2), + " Ok(4), + _ => Err(ProcessingError::CudaError(format!( + "unsupported typestr: {typestr}" + ))), + } +} diff --git a/crates/processing_pyo3/Cargo.toml b/crates/processing_pyo3/Cargo.toml index 9660e34..adddc4f 100644 --- a/crates/processing_pyo3/Cargo.toml +++ b/crates/processing_pyo3/Cargo.toml @@ -16,11 +16,13 @@ wayland = ["processing/wayland", "processing_glfw/wayland"] static-link = ["processing_glfw/static-link"] x11 = ["processing/x11"] webcam = ["processing/webcam", "dep:processing_webcam"] +cuda = ["dep:processing_cuda", "processing/cuda"] [dependencies] -pyo3 = { workspace = true, features = ["experimental-inspect"] } +pyo3 = { workspace = true, features = ["experimental-inspect", "multiple-pymethods"] } processing = { workspace = true } processing_webcam = { workspace = true, optional = true } processing_glfw = { workspace = true } bevy = { workspace = true, features = ["file_watcher"] } png = "0.18" +processing_cuda = { workspace = true, optional = true } diff --git a/crates/processing_pyo3/examples/cuda.py b/crates/processing_pyo3/examples/cuda.py new file mode 100644 index 0000000..817f07b --- /dev/null +++ b/crates/processing_pyo3/examples/cuda.py @@ -0,0 +1,40 @@ +from mewnala import * +import torch +import torch.nn.functional as F +import math + +W, H = 512, 512 +KERNEL_SIZE = 15 +sigma = 4.0 + +ax = torch.arange(-KERNEL_SIZE // 2 + 1.0, KERNEL_SIZE // 2 + 1.0, device="cuda") +xx, yy = torch.meshgrid(ax, ax, indexing="ij") +kernel = torch.exp(-(xx**2 + yy**2) / (2.0 * sigma**2)) +kernel = kernel / kernel.sum() +BLUR = kernel.unsqueeze(0).unsqueeze(0).repeat(4, 1, 1, 1) + +img = None + +def setup(): + global img + size(W, H) + img = create_image(W, H) + flush() + +def draw(): + t = frame_count * 0.02 + + no_stroke() + fill(255) + circle(W / 2 + math.cos(t) * 150, H / 2 + math.sin(t) * 150, 60) + + flush() + + tensor = torch.as_tensor(cuda(), device="cuda") + t_img = tensor.permute(2, 0, 1).unsqueeze(0).float() + blurred = F.conv2d(t_img, BLUR, padding=KERNEL_SIZE // 2, groups=4) + result = (blurred.squeeze(0).permute(1, 2, 0).clamp(0, 1) * 255).to(torch.uint8).contiguous() + img.update_from(result) + background(img) + +run() diff --git a/crates/processing_pyo3/pyproject.toml b/crates/processing_pyo3/pyproject.toml index c3347da..4e2db88 100644 --- a/crates/processing_pyo3/pyproject.toml +++ b/crates/processing_pyo3/pyproject.toml @@ -49,8 +49,9 @@ include = [ cache-keys = [ { file = "pyproject.toml" }, { file = "Cargo.toml" }, + { file = "../../Cargo.toml" }, { file = "src/**/*.rs" }, { file = "../../src/**/*.rs" }, - { file = "../processing_render/src/**/*.rs" }, - { file = "../processing_render/Cargo.toml" }, + { file = "../*/src/**/*.rs" }, + { file = "../*/Cargo.toml" }, ] diff --git a/crates/processing_pyo3/src/cuda.rs b/crates/processing_pyo3/src/cuda.rs new file mode 100644 index 0000000..e8064c2 --- /dev/null +++ b/crates/processing_pyo3/src/cuda.rs @@ -0,0 +1,59 @@ +use processing::prelude::image::pixel_size; +use processing_cuda::{cuda_buffer, cuda_write_back, typestr_for_format}; +use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyDict}; + +/// Implements `__cuda_array_interface__` for zero-copy interop +/// with PyTorch, CuPy, and other CUDA-aware Python libraries. +#[pyclass(unsendable)] +pub struct CudaImage { + entity: bevy::prelude::Entity, +} + +impl CudaImage { + pub fn new(entity: bevy::prelude::Entity) -> Self { + Self { entity } + } +} + +#[pymethods] +impl CudaImage { + pub fn sync(&self) -> PyResult<()> { + cuda_write_back(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}"))) + } + + #[getter] + pub fn shape(&self) -> PyResult<(u32, u32, u32)> { + let info = cuda_buffer(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + Ok((info.height, info.width, 4)) + } + + #[getter] + pub fn device_ptr(&self) -> PyResult { + let info = cuda_buffer(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + Ok(info.device_ptr) + } + + #[getter] + pub fn __cuda_array_interface__<'py>(&self, py: Python<'py>) -> PyResult> { + let info = cuda_buffer(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + + let typestr = typestr_for_format(info.texture_format) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + let px_size = + pixel_size(info.texture_format).map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + + let height = info.height as usize; + let width = info.width as usize; + let channels: usize = 4; + let elem_size = px_size / channels; + + let dict = PyDict::new(py); + dict.set_item("data", (info.device_ptr, false))?; + dict.set_item("shape", (height, width, channels))?; + dict.set_item("typestr", typestr)?; + dict.set_item("strides", (width * px_size, px_size, elem_size))?; + dict.set_item("version", 3)?; + + Ok(dict) + } +} diff --git a/crates/processing_pyo3/src/graphics.rs b/crates/processing_pyo3/src/graphics.rs index 1107fde..1d4ce31 100644 --- a/crates/processing_pyo3/src/graphics.rs +++ b/crates/processing_pyo3/src/graphics.rs @@ -6,7 +6,7 @@ use bevy::{ color::{ColorToPacked, Srgba}, math::Vec4, prelude::Entity, - render::render_resource::TextureFormat, + render::render_resource::{Extent3d, TextureFormat}, }; use processing::prelude::*; use pyo3::{ @@ -15,6 +15,43 @@ use pyo3::{ types::{PyDict, PyTuple}, }; +#[cfg(feature = "cuda")] +use crate::cuda::CudaImage; + +#[cfg(feature = "cuda")] +fn cuda_import_from_interface( + entity: bevy::prelude::Entity, + obj: &pyo3::Bound<'_, pyo3::PyAny>, +) -> PyResult<()> { + let interface = obj + .getattr("__cuda_array_interface__")? + .cast_into::()?; + + let data_tuple: (u64, bool) = interface + .get_item("data")? + .ok_or_else(|| PyRuntimeError::new_err("missing 'data' in __cuda_array_interface__"))? + .extract()?; + let src_ptr = data_tuple.0; + + let shape: Vec = interface + .get_item("shape")? + .ok_or_else(|| PyRuntimeError::new_err("missing 'shape' in __cuda_array_interface__"))? + .extract()?; + + let typestr: String = interface + .get_item("typestr")? + .ok_or_else(|| PyRuntimeError::new_err("missing 'typestr' in __cuda_array_interface__"))? + .extract()?; + + let elem_size = processing_cuda::elem_size_for_typestr(&typestr) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + let total_elements: usize = shape.iter().product(); + let byte_size = (total_elements * elem_size) as u64; + + processing_cuda::cuda_import(entity, src_ptr, byte_size) + .map_err(|e| PyRuntimeError::new_err(format!("{e}"))) +} + #[pyclass(name = "BlendMode", from_py_object)] #[derive(Clone)] pub struct PyBlendMode { @@ -164,6 +201,20 @@ impl Drop for Image { } } +#[cfg(feature = "cuda")] +#[pymethods] +impl Image { + pub fn cuda(&self) -> PyResult { + processing_cuda::cuda_export(self.entity) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + Ok(CudaImage::new(self.entity)) + } + + pub fn update_from(&self, obj: &Bound<'_, pyo3::PyAny>) -> PyResult<()> { + cuda_import_from_interface(self.entity, obj) + } +} + #[pyclass(unsendable)] pub struct Geometry { pub(crate) entity: Entity, @@ -254,8 +305,6 @@ impl Geometry { pub struct Graphics { pub(crate) entity: Entity, pub surface: Surface, - pub width: u32, - pub height: u32, } impl Drop for Graphics { @@ -302,8 +351,6 @@ impl Graphics { Ok(Self { entity: graphics, surface, - width, - height, }) } @@ -339,8 +386,6 @@ impl Graphics { Ok(Self { entity: graphics, surface, - width, - height, }) } @@ -739,6 +784,18 @@ impl Graphics { } } + pub fn create_image(&self, width: u32, height: u32) -> PyResult { + let size = Extent3d { + width, + height, + depth_or_array_layers: 1, + }; + let data = vec![0u8; (width * height * 4) as usize]; + let entity = image_create(size, data, TextureFormat::Rgba8UnormSrgb) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + Ok(Image { entity }) + } + pub fn push_matrix(&self) -> PyResult<()> { graphics_record_command(self.entity, DrawCommand::PushMatrix) .map_err(|e| PyRuntimeError::new_err(format!("{e}"))) @@ -980,6 +1037,10 @@ impl Graphics { .map_err(|e| PyRuntimeError::new_err(format!("{e}"))) } + pub fn flush(&self) -> PyResult<()> { + graphics_flush(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}"))) + } + pub fn begin_draw(&self) -> PyResult<()> { graphics_begin_draw(self.entity).map_err(|e| PyRuntimeError::new_err(format!("{e}"))) } @@ -1195,6 +1256,20 @@ impl Graphics { } } +#[cfg(feature = "cuda")] +#[pymethods] +impl Graphics { + pub fn cuda(&self) -> PyResult { + processing_cuda::cuda_export(self.entity) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + Ok(CudaImage::new(self.entity)) + } + + pub fn update_from(&self, obj: &Bound<'_, pyo3::PyAny>) -> PyResult<()> { + cuda_import_from_interface(self.entity, obj) + } +} + pub fn get_graphics<'py>(module: &Bound<'py, PyModule>) -> PyResult>> { let Ok(attr) = module.getattr("_graphics") else { return Ok(None); diff --git a/crates/processing_pyo3/src/lib.rs b/crates/processing_pyo3/src/lib.rs index ffd56bb..71362ed 100644 --- a/crates/processing_pyo3/src/lib.rs +++ b/crates/processing_pyo3/src/lib.rs @@ -9,6 +9,8 @@ //! To allow Python users to create a similar experience, we provide module-level //! functions that forward to a singleton Graphics object pub(crate) behind the scenes. pub(crate) mod color; +#[cfg(feature = "cuda")] +pub(crate) mod cuda; mod glfw; mod gltf; mod graphics; @@ -134,6 +136,9 @@ mod mewnala { use super::Shader; #[pymodule_export] use super::Topology; + #[cfg(feature = "cuda")] + #[pymodule_export] + use super::cuda::CudaImage; // Stroke cap/join #[pymodule_export] @@ -565,6 +570,31 @@ mod mewnala { graphics.readback_png().map(Some) } + #[pyfunction] + #[pyo3(pass_module)] + fn flush(module: &Bound<'_, PyModule>) -> PyResult<()> { + graphics!(module).flush() + } + + #[cfg(feature = "cuda")] + #[pyfunction] + #[pyo3(pass_module)] + fn cuda(module: &Bound<'_, PyModule>) -> PyResult { + let graphics = + get_graphics(module)?.ok_or_else(|| PyRuntimeError::new_err("call size() first"))?; + graphics.cuda() + } + + #[cfg(feature = "cuda")] + #[pyfunction] + #[pyo3(pass_module)] + fn update_graphics_from( + module: &Bound<'_, PyModule>, + obj: &Bound<'_, pyo3::PyAny>, + ) -> PyResult<()> { + graphics!(module).update_from(obj) + } + #[pyfunction] #[pyo3(pass_module)] fn redraw(module: &Bound<'_, PyModule>) -> PyResult<()> { @@ -673,13 +703,12 @@ mod mewnala { // call setup setup_fn.call0()?; + let mut frame_count: u64 = 0; { let graphics = get_graphics(module)? .ok_or_else(|| PyRuntimeError::new_err("call size() first"))?; input::sync_globals(&draw_fn, graphics.surface.entity)?; } - - // start draw loop loop { { let mut graphics = get_graphics_mut(module)? @@ -694,18 +723,13 @@ mod mewnala { let cstr: &CStr = ok.as_c_str(); match py.run(cstr, None, Some(&locals)) { - Ok(_) => { - dbg!("Success of any kind?"); - } + Ok(_) => {} Err(e) => { - dbg!(e); + eprintln!("sketch reload error: {e}"); } } - // setup_fn = locals.get_item("setup").unwrap().unwrap(); draw_fn = locals.get_item("draw").unwrap().unwrap(); - - dbg!(locals); } if !graphics.surface.poll_events() { @@ -718,6 +742,9 @@ mod mewnala { let graphics = get_graphics(module)? .ok_or_else(|| PyRuntimeError::new_err("call size() first"))?; input::sync_globals(&draw_fn, graphics.surface.entity)?; + let globals = draw_fn.getattr("__globals__")?; + globals.set_item("frame_count", frame_count)?; + frame_count += 1; } draw_fn @@ -967,6 +994,14 @@ mod mewnala { graphics.image(image_file) } + #[pyfunction] + #[pyo3(pass_module)] + fn create_image(module: &Bound<'_, PyModule>, width: u32, height: u32) -> PyResult { + let graphics = + get_graphics(module)?.ok_or_else(|| PyRuntimeError::new_err("call size() first"))?; + graphics.create_image(width, height) + } + #[pyfunction] #[pyo3(pass_module)] fn create_directional_light( diff --git a/crates/processing_render/src/graphics.rs b/crates/processing_render/src/graphics.rs index 1038d5a..11bae9d 100644 --- a/crates/processing_render/src/graphics.rs +++ b/crates/processing_render/src/graphics.rs @@ -9,18 +9,18 @@ use bevy::{ ImageRenderTarget, MsaaWriteback, Projection, RenderTarget, visibility::RenderLayers, }, core_pipeline::tonemapping::Tonemapping, - ecs::{entity::EntityHashMap, query::QueryEntityError}, + ecs::query::QueryEntityError, math::{Mat4, Vec3A}, prelude::*, render::{ - Render, RenderSystems, + RenderApp, render_resource::{ CommandEncoderDescriptor, Extent3d, MapMode, Origin3d, PollType, TexelCopyBufferInfo, - TexelCopyBufferLayout, TexelCopyTextureInfo, TextureFormat, TextureUsages, + TexelCopyBufferLayout, TexelCopyTextureInfo, Texture, TextureFormat, TextureUsages, }, renderer::{RenderDevice, RenderQueue}, sync_world::MainEntity, - view::{ViewTarget, prepare_view_targets}, + view::ViewTarget, }, window::WindowRef, }; @@ -41,21 +41,6 @@ pub struct GraphicsPlugin; impl Plugin for GraphicsPlugin { fn build(&self, app: &mut App) { app.init_resource::(); - - let (tx, rx) = crossbeam_channel::unbounded::<(Entity, ViewTarget)>(); - app.init_resource::() - .insert_resource(GraphicsTargetReceiver(rx)) - .add_systems(First, update_view_targets); - - let render_app = app.sub_app_mut(bevy::render::RenderApp); - render_app - .add_systems( - Render, - send_view_targets - .in_set(RenderSystems::PrepareViews) - .after(prepare_view_targets), - ) - .insert_resource(GraphicsTargetSender(tx)); } } @@ -66,36 +51,15 @@ pub struct Graphics { pub size: Extent3d, } -// We store a mapping of graphics target entities to their GPU `ViewTarget`s. In the -// Processing API, graphics *are* images, so we need to be able to look up the `ViewTarget` for a -// given graphics entity when referencing it as an image. -#[derive(Resource, Deref, DerefMut, Default)] -pub struct GraphicsTargets(EntityHashMap); - -#[derive(Resource, Deref, DerefMut)] -pub struct GraphicsTargetReceiver(crossbeam_channel::Receiver<(Entity, ViewTarget)>); - -#[derive(Resource, Deref, DerefMut)] -pub struct GraphicsTargetSender(crossbeam_channel::Sender<(Entity, ViewTarget)>); - -fn send_view_targets( - view_targets: Query<(MainEntity, &ViewTarget), Changed>, - sender: Res, -) { - for (main_entity, view_target) in view_targets.iter() { - sender - .send((main_entity, view_target.clone())) - .expect("Failed to send updated view target"); - } -} - -pub fn update_view_targets( - mut graphics_targets: ResMut, - receiver: Res, -) { - while let Ok((entity, view_target)) = receiver.0.try_recv() { - graphics_targets.insert(entity, view_target); +pub fn view_target(app: &mut App, entity: Entity) -> Result<&ViewTarget> { + let rw = app.sub_app_mut(RenderApp).world_mut(); + let mut query = rw.query::<(&MainEntity, &ViewTarget)>(); + for (main_entity, vt) in query.iter(rw) { + if **main_entity == entity { + return Ok(vt); + } } + Err(ProcessingError::GraphicsNotFound) } macro_rules! graphics_mut { @@ -440,10 +404,6 @@ pub fn flush(app: &mut App, entity: Entity) -> Result<()> { graphics_mut!(app, entity).insert(Flush); app.update(); graphics_mut!(app, entity).remove::(); - // ensure graphics targets are available immediately after flush - app.world_mut() - .run_system_cached(update_view_targets) - .expect("Failed to run update_view_targets"); Ok(()) } @@ -489,9 +449,8 @@ pub struct ReadbackData { } pub fn readback_raw( - In(entity): In, + In((entity, texture)): In<(Entity, Texture)>, graphics_query: Query<&Graphics>, - graphics_targets: Res, render_device: Res, render_queue: Res, ) -> Result { @@ -499,12 +458,6 @@ pub fn readback_raw( .get(entity) .map_err(|_| ProcessingError::GraphicsNotFound)?; - let view_target = graphics_targets - .get(&entity) - .ok_or(ProcessingError::GraphicsNotFound)?; - - let texture = view_target.main_texture(); - let mut encoder = render_device.create_command_encoder(&CommandEncoderDescriptor::default()); let px_size = pixel_size(graphics.texture_format)?; @@ -570,8 +523,9 @@ pub fn readback_raw( } pub fn update_region_write( - In((entity, x, y, width, height, data, px_size)): In<( + In((entity, texture, x, y, width, height, data, px_size)): In<( Entity, + Texture, u32, u32, u32, @@ -580,7 +534,6 @@ pub fn update_region_write( u32, )>, graphics_query: Query<&Graphics>, - graphics_targets: Res, render_queue: Res, ) -> Result<()> { let graphics = graphics_query @@ -594,17 +547,11 @@ pub fn update_region_write( x, y, width, height, graphics.size.width, graphics.size.height ))); } - - let view_target = graphics_targets - .get(&entity) - .ok_or(ProcessingError::GraphicsNotFound)?; - - let texture = view_target.main_texture(); let bytes_per_row = width * px_size; render_queue.write_texture( TexelCopyTextureInfo { - texture, + texture: &texture, mip_level: 0, origin: Origin3d { x, y, z: 0 }, aspect: Default::default(), diff --git a/crates/processing_render/src/image.rs b/crates/processing_render/src/image.rs index 60f64c0..9a4d558 100644 --- a/crates/processing_render/src/image.rs +++ b/crates/processing_render/src/image.rs @@ -8,11 +8,11 @@ use bevy::{ AssetPath, LoadState, RenderAssetUsages, handle_internal_asset_events, io::{AssetSourceId, embedded::GetAssetServer}, }, - ecs::{entity::EntityHashMap, system::RunSystemOnce}, + ecs::system::RunSystemOnce, prelude::*, render::{ - ExtractSchedule, MainWorld, - render_asset::{AssetExtractionSystems, RenderAssets}, + RenderApp, + render_asset::RenderAssets, render_resource::{ Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d, PollType, TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, @@ -30,21 +30,9 @@ use processing_core::error::{ProcessingError, Result}; pub struct ImagePlugin; impl Plugin for ImagePlugin { - fn build(&self, app: &mut App) { - app.init_resource::(); - - let render_app = app.sub_app_mut(bevy::render::RenderApp); - render_app.add_systems(ExtractSchedule, sync_textures.after(AssetExtractionSystems)); - } + fn build(&self, _app: &mut App) {} } -// In Bevy, `Image` is a `RenderResource`, which means its descriptor is stored in the main world -// but its GPU texture is stored in the render world. To avoid tedious lookups or the need to -// explicitly reference the render world, we store a mapping of `PImage` entities to their -// corresponding GPU `Texture` in the main world. This is as bit hacky, but it simplifies the API. -#[derive(Resource, Deref, DerefMut, Default)] -pub struct ImageTextures(EntityHashMap); - #[derive(Component)] pub struct Image { pub handle: Handle, @@ -53,17 +41,6 @@ pub struct Image { pub size: Extent3d, } -fn sync_textures(mut main_world: ResMut, gpu_images: Res>) { - main_world.resource_scope(|world, mut p_image_textures: Mut| { - let mut p_images = world.query_filtered::<(Entity, &Image), Changed>(); - for (entity, p_image) in p_images.iter(world) { - if let Some(gpu_image) = gpu_images.get(&p_image.handle) { - p_image_textures.insert(entity, gpu_image.texture.clone()); - } - } - }); -} - pub fn create( In((size, data, texture_format)): In<(Extent3d, Vec, TextureFormat)>, mut commands: Commands, @@ -208,9 +185,8 @@ pub fn resize( } pub fn readback( - In(entity): In, + In((entity, texture)): In<(Entity, Texture)>, p_images: Query<&Image>, - p_image_textures: Res, mut images: ResMut>, render_device: Res, render_queue: ResMut, @@ -218,9 +194,6 @@ pub fn readback( let p_image = p_images .get(entity) .map_err(|_| ProcessingError::ImageNotFound)?; - let texture = p_image_textures - .get(&entity) - .ok_or(ProcessingError::ImageNotFound)?; let mut encoder = render_device.create_command_encoder(&CommandEncoderDescriptor::default()); @@ -281,8 +254,9 @@ pub fn readback( } pub fn update_region_write( - In((entity, x, y, width, height, data, px_size)): In<( + In((entity, texture, x, y, width, height, data, px_size)): In<( Entity, + Texture, u32, u32, u32, @@ -291,7 +265,6 @@ pub fn update_region_write( u32, )>, p_images: Query<&Image>, - p_image_textures: Res, render_queue: Res, ) -> Result<()> { let p_image = p_images @@ -305,15 +278,11 @@ pub fn update_region_write( ))); } - let texture = p_image_textures - .get(&entity) - .ok_or(ProcessingError::ImageNotFound)?; - let bytes_per_row = width * px_size; render_queue.write_texture( TexelCopyTextureInfo { - texture, + texture: &texture, mip_level: 0, origin: Origin3d { x, y, z: 0 }, aspect: Default::default(), @@ -365,13 +334,11 @@ pub fn destroy( In(entity): In, mut commands: Commands, p_images: Query<&Image>, - mut p_image_textures: ResMut, ) -> Result<()> { p_images .get(entity) .map_err(|_| ProcessingError::ImageNotFound)?; - p_image_textures.remove(&entity); commands.entity(entity).despawn(); Ok(()) } @@ -498,3 +465,18 @@ pub fn create_readback_buffer( mapped_at_creation: false, })) } + +pub fn gpu_image(app: &mut App, entity: Entity) -> Result<&GpuImage> { + let handle = app + .world() + .get::(entity) + .ok_or(ProcessingError::ImageNotFound)? + .handle + .clone(); + let render_world = app.sub_app(RenderApp).world(); + let gpu_images = render_world.resource::>(); + let gpu_image = gpu_images + .get(&handle) + .ok_or(ProcessingError::ImageNotFound)?; + Ok(gpu_image) +} diff --git a/crates/processing_render/src/lib.rs b/crates/processing_render/src/lib.rs index 11ceecd..0920067 100644 --- a/crates/processing_render/src/lib.rs +++ b/crates/processing_render/src/lib.rs @@ -4,7 +4,7 @@ pub mod camera; pub mod color; pub mod geometry; pub mod gltf; -mod graphics; +pub mod graphics; pub mod image; pub mod light; pub mod material; @@ -26,6 +26,7 @@ use processing_core::error; use crate::geometry::{AttributeFormat, AttributeValue}; use crate::graphics::flush; +use crate::image::gpu_image; use crate::render::command::DrawCommand; #[derive(Component)] @@ -302,8 +303,10 @@ pub fn graphics_destroy(graphics_entity: Entity) -> error::Result<()> { pub fn graphics_readback_raw(graphics_entity: Entity) -> error::Result { app_mut(|app| { graphics::flush(app, graphics_entity)?; + let vt = graphics::view_target(app, graphics_entity)?; + let texture = vt.main_texture().clone(); app.world_mut() - .run_system_cached_with(graphics::readback_raw, graphics_entity) + .run_system_cached_with(graphics::readback_raw, (graphics_entity, texture)) .unwrap() }) } @@ -325,6 +328,8 @@ pub fn graphics_readback(graphics_entity: Entity) -> error::Result error::Result<()> { app_mut(|app| { + let vt = graphics::view_target(app, graphics_entity)?; + let texture = vt.main_texture().clone(); let world = app.world_mut(); let size = world .get::(graphics_entity) @@ -342,6 +347,7 @@ pub fn graphics_update(graphics_entity: Entity, pixels: &[LinearRgba]) -> error: graphics::update_region_write, ( graphics_entity, + texture, 0, 0, size.width, @@ -364,13 +370,15 @@ pub fn graphics_update_region( pixels: &[LinearRgba], ) -> error::Result<()> { app_mut(|app| { + let vt = graphics::view_target(app, graphics_entity)?; + let texture = vt.main_texture().clone(); let world = app.world_mut(); let (data, px_size) = graphics::prepare_update_region(world, graphics_entity, width, height, pixels)?; world .run_system_cached_with( graphics::update_region_write, - (graphics_entity, x, y, width, height, data, px_size), + (graphics_entity, texture, x, y, width, height, data, px_size), ) .unwrap() }) @@ -733,8 +741,9 @@ pub fn image_resize(entity: Entity, new_size: Extent3d) -> error::Result<()> { /// Read back image data from GPU to CPU. pub fn image_readback(entity: Entity) -> error::Result> { app_mut(|app| { + let texture = gpu_image(app, entity)?.texture.clone(); app.world_mut() - .run_system_cached_with(image::readback, entity) + .run_system_cached_with(image::readback, (entity, texture)) .unwrap() }) } @@ -742,6 +751,7 @@ pub fn image_readback(entity: Entity) -> error::Result> { /// Update an existing image with new pixel data. pub fn image_update(entity: Entity, pixels: &[LinearRgba]) -> error::Result<()> { app_mut(|app| { + let texture = gpu_image(app, entity)?.texture.clone(); let world = app.world_mut(); let size = world .get::(entity) @@ -752,7 +762,16 @@ pub fn image_update(entity: Entity, pixels: &[LinearRgba]) -> error::Result<()> world .run_system_cached_with( image::update_region_write, - (entity, 0, 0, size.width, size.height, data, px_size), + ( + entity, + texture, + 0, + 0, + size.width, + size.height, + data, + px_size, + ), ) .unwrap() }) @@ -768,12 +787,13 @@ pub fn image_update_region( pixels: &[LinearRgba], ) -> error::Result<()> { app_mut(|app| { + let texture = gpu_image(app, entity)?.texture.clone(); let world = app.world_mut(); let (data, px_size) = image::prepare_update_region(world, entity, width, height, pixels)?; world .run_system_cached_with( image::update_region_write, - (entity, x, y, width, height, data, px_size), + (entity, texture, x, y, width, height, data, px_size), ) .unwrap() }) diff --git a/crates/processing_render/src/material/custom.rs b/crates/processing_render/src/material/custom.rs index 608c047..04080b8 100644 --- a/crates/processing_render/src/material/custom.rs +++ b/crates/processing_render/src/material/custom.rs @@ -403,8 +403,13 @@ impl ErasedRenderAsset for CustomMaterial { let bind_group_layout = BindGroupLayoutDescriptor::new("custom_material_bind_group", &layout_entries); - let bindings = - reflection.create_bindings(3, &source_asset.shader, render_device, gpu_images, gpu_buffers); + let bindings = reflection.create_bindings( + 3, + &source_asset.shader, + render_device, + gpu_images, + gpu_buffers, + ); let unprepared = UnpreparedBindGroup { bindings: BindingResources(bindings), diff --git a/crates/processing_render/src/surface.rs b/crates/processing_render/src/surface.rs index afeee6f..bbda9d5 100644 --- a/crates/processing_render/src/surface.rs +++ b/crates/processing_render/src/surface.rs @@ -35,7 +35,7 @@ use processing_core::error::{self, ProcessingError, Result}; #[cfg(not(target_os = "windows"))] use std::ptr::NonNull; -use crate::image::{Image, ImageTextures}; +use crate::image::Image; #[derive(Component, Debug, Clone)] pub struct Surface; @@ -341,12 +341,10 @@ pub fn destroy( mut commands: Commands, p_images: Query<&Image, With>, mut images: ResMut>, - mut p_image_textures: ResMut, ) -> Result<()> { match p_images.get(surface_entity) { Ok(p_image) => { images.remove(&p_image.handle); - p_image_textures.remove(&surface_entity); commands.entity(surface_entity).despawn(); Ok(()) } diff --git a/justfile b/justfile index 9d65646..cfc65d6 100644 --- a/justfile +++ b/justfile @@ -5,20 +5,20 @@ export PROCESSING_ASSET_ROOT := canonicalize("./assets") default: @just --list -py-build: - cd crates/processing_pyo3; uv run maturin develop --release +py-build *args: + cd crates/processing_pyo3; uv run maturin develop --release {{args}} py-stubs: py-build cargo run --release -p generate_stubs -py-run file: py-build +py-run file *args: (py-build args) cd crates/processing_pyo3; uv run python ./examples/{{file}} py-jupyter file: py-build cd crates/processing_pyo3; uv run jupyter notebook ./examples/{{file}} py-ipython: py-build - cd crates/processing_pyo3; ipython + cd crates/processing_pyo3; uv run ipython wasm-build: wasm-pack build crates/processing_wasm --target web --out-dir ../../target/wasm diff --git a/src/lib.rs b/src/lib.rs index b71988f..55e3678 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,9 @@ fn create_app(config: Config) -> App { app.add_plugins(processing_input::InputPlugin); app.add_plugins(processing_render::ProcessingRenderPlugin); + #[cfg(feature = "cuda")] + app.add_plugins(processing_cuda::CudaPlugin); + #[cfg(feature = "webcam")] app.add_plugins(processing_webcam::WebcamPlugin); @@ -128,6 +131,9 @@ pub fn exit(exit_code: u8) -> error::Result<()> { Ok(()) } +#[cfg(feature = "cuda")] +pub use processing_cuda; + fn setup_tracing(log_level: Option<&str>) -> error::Result<()> { // TODO: figure out wasm compatible tracing subscriber #[cfg(not(target_arch = "wasm32"))]