feat: implement MediaProjection screenshot capture

- Add ScreenshotManager singleton for coordinating screenshot capture
- Implement MediaProjection-based screen capture with user consent flow
- Add screenshot command handler to DirectGateway
- Add UI for screenshot permission in MainActivity
- Auto-scale images to max 1920px to save bandwidth
- Return base64-encoded PNG via command response

Remaining TODO: None - screenshot feature complete pending testing
This commit is contained in:
James (ClawdBot) 2026-03-02 11:03:40 +00:00
parent 585f921601
commit 415703665d
6 changed files with 368 additions and 1 deletions

View File

@ -9,6 +9,7 @@
<!-- Foreground service --> <!-- Foreground service -->
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" /> <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" /> <uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_MEDIA_PROJECTION" />
<uses-permission android:name="android.permission.POST_NOTIFICATIONS" /> <uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
<uses-permission android:name="android.permission.WAKE_LOCK" /> <uses-permission android:name="android.permission.WAKE_LOCK" />

View File

@ -6,6 +6,7 @@ import android.app.NotificationManager
import android.os.Build import android.os.Build
import com.inou.clawdnode.debug.DebugClient import com.inou.clawdnode.debug.DebugClient
import com.inou.clawdnode.gateway.DirectGateway import com.inou.clawdnode.gateway.DirectGateway
import com.inou.clawdnode.screenshot.ScreenshotManager
import com.inou.clawdnode.security.AuditLog import com.inou.clawdnode.security.AuditLog
import com.inou.clawdnode.security.DeviceIdentity import com.inou.clawdnode.security.DeviceIdentity
import com.inou.clawdnode.security.TokenStore import com.inou.clawdnode.security.TokenStore
@ -34,6 +35,9 @@ class ClawdNodeApp : Application() {
tokenStore = TokenStore(this) tokenStore = TokenStore(this)
auditLog = AuditLog(this) auditLog = AuditLog(this)
// Initialize screenshot capture
ScreenshotManager.init(this)
// Create notification channels // Create notification channels
createNotificationChannels() createNotificationChannels()

View File

@ -3,6 +3,7 @@ package com.inou.clawdnode.gateway
import android.util.Log import android.util.Log
import com.inou.clawdnode.ClawdNodeApp import com.inou.clawdnode.ClawdNodeApp
import com.inou.clawdnode.debug.DebugClient import com.inou.clawdnode.debug.DebugClient
import com.inou.clawdnode.screenshot.ScreenshotManager
import com.inou.clawdnode.sms.SmsProvider import com.inou.clawdnode.sms.SmsProvider
import kotlinx.coroutines.* import kotlinx.coroutines.*
import okhttp3.* import okhttp3.*
@ -310,6 +311,30 @@ object DirectGateway {
ClawdNodeApp.instance.auditLog.log("COMMAND_EXECUTED", "sms.send to=$to") ClawdNodeApp.instance.auditLog.log("COMMAND_EXECUTED", "sms.send to=$to")
} }
"screenshot" -> {
Log.i(TAG, "Taking screenshot")
if (!ScreenshotManager.hasPermission()) {
sendResponse(commandId, false, "MediaProjection permission not granted. Open app to enable.")
return
}
ScreenshotManager.capture { result ->
result.fold(
onSuccess = { screenshot ->
sendDataResponse(commandId, JSONObject().apply {
put("width", screenshot.width)
put("height", screenshot.height)
put("base64", screenshot.base64)
})
},
onFailure = { error ->
sendResponse(commandId, false, error.message ?: "Screenshot failed")
}
)
}
ClawdNodeApp.instance.auditLog.log("COMMAND_EXECUTED", "screenshot")
}
else -> { else -> {
Log.w(TAG, "Unknown command: $command") Log.w(TAG, "Unknown command: $command")
sendResponse(commandId, false, "Unknown command: $command") sendResponse(commandId, false, "Unknown command: $command")

View File

@ -0,0 +1,267 @@
package com.inou.clawdnode.screenshot
import android.content.Context
import android.content.Intent
import android.graphics.Bitmap
import android.graphics.PixelFormat
import android.hardware.display.DisplayManager
import android.hardware.display.VirtualDisplay
import android.media.Image
import android.media.ImageReader
import android.media.projection.MediaProjection
import android.media.projection.MediaProjectionManager
import android.os.Handler
import android.os.HandlerThread
import android.util.Base64
import android.util.DisplayMetrics
import android.util.Log
import android.view.WindowManager
import com.inou.clawdnode.ClawdNodeApp
import java.io.ByteArrayOutputStream
import java.util.concurrent.atomic.AtomicBoolean
import kotlinx.coroutines.*
/**
* Singleton manager for screenshot capture via MediaProjection.
*
* MediaProjection requires user consent, so the flow is:
* 1. App requests permission via startActivityForResult (in MainActivity)
* 2. User grants permission (one-time, survives until app restart)
* 3. Permission intent stored here via setProjectionIntent()
* 4. When screenshot requested, we create projection and capture
*
* Note: MediaProjection can be reused until revoked or app dies.
*/
object ScreenshotManager {
private const val TAG = "ScreenshotManager"
private var projectionManager: MediaProjectionManager? = null
private var mediaProjection: MediaProjection? = null
private var projectionIntent: Intent? = null
private var projectionResultCode: Int = 0
private val isCapturing = AtomicBoolean(false)
private var imageReader: ImageReader? = null
private var virtualDisplay: VirtualDisplay? = null
private val handlerThread = HandlerThread("ScreenshotHandler").apply { start() }
private val handler = Handler(handlerThread.looper)
// Callbacks
private var pendingCallback: ((Result<ScreenshotResult>) -> Unit)? = null
data class ScreenshotResult(
val width: Int,
val height: Int,
val base64: String
)
/**
* Initialize the manager. Call from Application.onCreate().
*/
fun init(context: Context) {
projectionManager = context.getSystemService(Context.MEDIA_PROJECTION_SERVICE)
as MediaProjectionManager
}
/**
* Get the intent to request MediaProjection permission.
* Start this intent with startActivityForResult.
*/
fun getProjectionIntent(): Intent? {
return projectionManager?.createScreenCaptureIntent()
}
/**
* Store the granted projection permission.
* Call this from onActivityResult when user grants permission.
*/
fun setProjectionIntent(resultCode: Int, data: Intent?) {
if (resultCode == android.app.Activity.RESULT_OK && data != null) {
projectionResultCode = resultCode
projectionIntent = data
Log.i(TAG, "MediaProjection permission granted and stored")
ClawdNodeApp.instance.auditLog.log("SCREENSHOT_PERMISSION", "Granted")
} else {
Log.w(TAG, "MediaProjection permission denied")
ClawdNodeApp.instance.auditLog.log("SCREENSHOT_PERMISSION", "Denied")
}
}
/**
* Check if we have permission to capture.
*/
fun hasPermission(): Boolean = projectionIntent != null
/**
* Capture a screenshot.
* Callback receives Result with base64-encoded PNG or error.
*/
fun capture(callback: (Result<ScreenshotResult>) -> Unit) {
if (!hasPermission()) {
callback(Result.failure(IllegalStateException("MediaProjection permission not granted")))
return
}
if (!isCapturing.compareAndSet(false, true)) {
callback(Result.failure(IllegalStateException("Screenshot already in progress")))
return
}
pendingCallback = callback
try {
// Get display metrics
val context = ClawdNodeApp.instance.applicationContext
val windowManager = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
val metrics = DisplayMetrics()
@Suppress("DEPRECATION")
windowManager.defaultDisplay.getRealMetrics(metrics)
val width = metrics.widthPixels
val height = metrics.heightPixels
val density = metrics.densityDpi
Log.d(TAG, "Capturing screenshot: ${width}x${height} @ $density dpi")
// Create projection (if needed)
if (mediaProjection == null) {
mediaProjection = projectionManager?.getMediaProjection(
projectionResultCode,
projectionIntent!!.clone() as Intent
)
mediaProjection?.registerCallback(object : MediaProjection.Callback() {
override fun onStop() {
Log.i(TAG, "MediaProjection stopped")
cleanup()
}
}, handler)
}
// Create ImageReader
imageReader = ImageReader.newInstance(width, height, PixelFormat.RGBA_8888, 2)
// Create VirtualDisplay
virtualDisplay = mediaProjection?.createVirtualDisplay(
"ClawdNodeScreenshot",
width, height, density,
DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
imageReader?.surface,
null, handler
)
// Set up image listener with delay to let display render
handler.postDelayed({
captureFrame()
}, 100) // Small delay to ensure frame is rendered
} catch (e: Exception) {
Log.e(TAG, "Screenshot capture failed", e)
isCapturing.set(false)
callback(Result.failure(e))
}
}
private fun captureFrame() {
try {
val image = imageReader?.acquireLatestImage()
if (image == null) {
// No frame yet, retry briefly
handler.postDelayed({ captureFrame() }, 50)
return
}
val result = processImage(image)
image.close()
cleanupCapture()
isCapturing.set(false)
pendingCallback?.invoke(Result.success(result))
pendingCallback = null
Log.i(TAG, "Screenshot captured: ${result.width}x${result.height}")
ClawdNodeApp.instance.auditLog.log("SCREENSHOT_CAPTURED",
"${result.width}x${result.height}, ${result.base64.length} bytes")
} catch (e: Exception) {
Log.e(TAG, "Frame capture failed", e)
cleanupCapture()
isCapturing.set(false)
pendingCallback?.invoke(Result.failure(e))
pendingCallback = null
}
}
private fun processImage(image: Image): ScreenshotResult {
val planes = image.planes
val buffer = planes[0].buffer
val pixelStride = planes[0].pixelStride
val rowStride = planes[0].rowStride
val rowPadding = rowStride - pixelStride * image.width
// Create bitmap with padding
val bitmapWidth = image.width + rowPadding / pixelStride
val bitmap = Bitmap.createBitmap(bitmapWidth, image.height, Bitmap.Config.ARGB_8888)
bitmap.copyPixelsFromBuffer(buffer)
// Crop to actual size if needed
val croppedBitmap = if (bitmapWidth != image.width) {
Bitmap.createBitmap(bitmap, 0, 0, image.width, image.height)
} else {
bitmap
}
// Scale down if too large (max 1920px on longest edge for bandwidth)
val maxDimension = 1920
val scaledBitmap = if (croppedBitmap.width > maxDimension || croppedBitmap.height > maxDimension) {
val scale = maxDimension.toFloat() / maxOf(croppedBitmap.width, croppedBitmap.height)
val newWidth = (croppedBitmap.width * scale).toInt()
val newHeight = (croppedBitmap.height * scale).toInt()
Bitmap.createScaledBitmap(croppedBitmap, newWidth, newHeight, true)
} else {
croppedBitmap
}
// Convert to PNG base64
val outputStream = ByteArrayOutputStream()
scaledBitmap.compress(Bitmap.CompressFormat.PNG, 90, outputStream)
val base64 = Base64.encodeToString(outputStream.toByteArray(), Base64.NO_WRAP)
// Clean up bitmaps
if (bitmap != croppedBitmap) bitmap.recycle()
if (croppedBitmap != scaledBitmap) croppedBitmap.recycle()
scaledBitmap.recycle()
return ScreenshotResult(
width = scaledBitmap.width,
height = scaledBitmap.height,
base64 = base64
)
}
private fun cleanupCapture() {
virtualDisplay?.release()
virtualDisplay = null
imageReader?.close()
imageReader = null
}
private fun cleanup() {
cleanupCapture()
mediaProjection = null
// Don't clear intent - user consent remains valid
}
/**
* Release all resources. Call when app is shutting down.
*/
fun release() {
cleanup()
mediaProjection?.stop()
projectionIntent = null
handlerThread.quitSafely()
}
}

View File

@ -18,6 +18,7 @@ import androidx.appcompat.app.AppCompatActivity
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import com.inou.clawdnode.ClawdNodeApp import com.inou.clawdnode.ClawdNodeApp
import com.inou.clawdnode.databinding.ActivityMainBinding import com.inou.clawdnode.databinding.ActivityMainBinding
import com.inou.clawdnode.screenshot.ScreenshotManager
import com.inou.clawdnode.service.NodeService import com.inou.clawdnode.service.NodeService
/** /**
@ -67,6 +68,13 @@ class MainActivity : AppCompatActivity() {
updatePermissionStatus() updatePermissionStatus()
} }
private val mediaProjectionLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult()
) { result ->
ScreenshotManager.setProjectionIntent(result.resultCode, result.data)
updatePermissionStatus()
}
override fun onCreate(savedInstanceState: Bundle?) { override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState) super.onCreate(savedInstanceState)
binding = ActivityMainBinding.inflate(layoutInflater) binding = ActivityMainBinding.inflate(layoutInflater)
@ -112,6 +120,10 @@ class MainActivity : AppCompatActivity() {
requestRuntimePermissions() requestRuntimePermissions()
} }
binding.btnGrantScreenshot.setOnClickListener {
requestScreenshotPermission()
}
// Connection control // Connection control
binding.btnConnect.setOnClickListener { binding.btnConnect.setOnClickListener {
nodeService?.connect() nodeService?.connect()
@ -198,6 +210,10 @@ class MainActivity : AppCompatActivity() {
// Runtime permissions // Runtime permissions
val permissionsGranted = areRuntimePermissionsGranted() val permissionsGranted = areRuntimePermissionsGranted()
binding.tvPermissionsStatus.text = if (permissionsGranted) "✓ All granted" else "✗ Some missing" binding.tvPermissionsStatus.text = if (permissionsGranted) "✓ All granted" else "✗ Some missing"
// Screenshot/Screen capture
val screenshotEnabled = isScreenshotPermissionGranted()
binding.tvScreenshotStatus.text = if (screenshotEnabled) "✓ Granted" else "✗ Not granted"
} }
// ======================================== // ========================================
@ -244,6 +260,19 @@ class MainActivity : AppCompatActivity() {
} }
} }
private fun requestScreenshotPermission() {
val intent = ScreenshotManager.getProjectionIntent()
if (intent != null) {
mediaProjectionLauncher.launch(intent)
} else {
Toast.makeText(this, "Screenshot not available", Toast.LENGTH_SHORT).show()
}
}
private fun isScreenshotPermissionGranted(): Boolean {
return ScreenshotManager.hasPermission()
}
private fun showAuditLog() { private fun showAuditLog() {
val entries = ClawdNodeApp.instance.auditLog.getRecentEntries(50) val entries = ClawdNodeApp.instance.auditLog.getRecentEntries(50)
val text = entries.joinToString("\n\n") { entry -> val text = entries.joinToString("\n\n") { entry ->

View File

@ -206,7 +206,7 @@
android:layout_height="wrap_content" android:layout_height="wrap_content"
android:orientation="horizontal" android:orientation="horizontal"
android:gravity="center_vertical" android:gravity="center_vertical"
android:layout_marginBottom="24dp"> android:layout_marginBottom="8dp">
<LinearLayout <LinearLayout
android:layout_width="0dp" android:layout_width="0dp"
@ -241,6 +241,47 @@
</LinearLayout> </LinearLayout>
<!-- Screenshot Permission -->
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal"
android:gravity="center_vertical"
android:layout_marginBottom="24dp">
<LinearLayout
android:layout_width="0dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:orientation="vertical">
<TextView
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Screen Capture"
android:textSize="14sp"
android:textColor="#1C1917" />
<TextView
android:id="@+id/tvScreenshotStatus"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="✗ Not granted"
android:textSize="12sp"
android:textColor="#78716C" />
</LinearLayout>
<Button
android:id="@+id/btnGrantScreenshot"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Grant"
android:backgroundTint="#E5E2DE"
android:textColor="#1C1917" />
</LinearLayout>
<!-- Live Log --> <!-- Live Log -->
<TextView <TextView
android:layout_width="wrap_content" android:layout_width="wrap_content"