{-# LANGUAGE OverloadedStrings #-}

{- | Threat model for detecting Large Data Attack vulnerabilities.

A Large Data Attack exploits permissive @FromData@ parsers in Plutus validators
that ignore extra fields when deserializing @Constr@ data. If a validator's
datum parser only checks the fields it expects and ignores additional ones,
an attacker can "bloat" the datum with extra fields while preserving the
validator's interpretation.

== Consequences ==

1. __Increased execution costs__: Processing bloated datums wastes CPU/memory
   execution units, making transactions more expensive.

2. __Permanent fund locking__: If the datum is bloated sufficiently:

   - Deserializing the datum may exceed execution unit limits
   - The transaction required to spend the UTxO may exceed protocol size limits

   In these cases, the UTxO becomes __permanently unspendable__ and funds
   are locked forever with no possibility of recovery.

== Root Cause ==

'unstableMakeIsData' and 'makeIsDataIndexed' generate parsers that use
wildcard patterns for constructor fields:

@
case (index, args) of
  (0, _) -> MyConstructor  -- The "_" ignores ALL extra fields!
@

This means @Constr 0 []@ and @Constr 0 [junk1, junk2, ..., junk10000]@ both
parse to the same value, allowing attackers to inject arbitrary data.

== Mitigation ==

A secure validator should either:

- Use strict manual @FromData@ instances that check field count exactly
- Validate the datum hash matches an expected value
- Check datum structure explicitly in the validator logic

This threat model tests if a script output with an inline datum still validates
when additional fields are appended to the datum's @Constr@ data structure.
If it does, the validator has a Large Data Attack vulnerability.
-}
module Convex.ThreatModel.LargeData (
  largeDataAttack,
  largeDataAttackWith,
  bloatData,
) where

import Convex.ThreatModel

{- | Check for Large Data Attack vulnerabilities with 1000 extra fields.

This is the default configuration that appends 1000 extra @ScriptDataNumber 42@
fields to any inline datum on a script output. If the transaction still
validates, the script's datum parser is permissive and vulnerable.
-}
largeDataAttack :: ThreatModel ()
largeDataAttack :: ThreatModel ()
largeDataAttack = Int -> ThreatModel ()
largeDataAttackWith Int
1000

{- | Check for Large Data Attack vulnerabilities with a configurable number
of extra fields.

For a transaction with script outputs containing inline datums:

* Try bloating the datum by appending @n@ extra fields
* If the transaction still validates, the script doesn't strictly validate
  its datum structure - it only checks expected fields and ignores extras.

This catches a vulnerability where different parsers may interpret the same
on-chain data differently, leading to potential exploits.
-}
largeDataAttackWith :: Int -> ThreatModel ()
largeDataAttackWith :: Int -> ThreatModel ()
largeDataAttackWith Int
n = [Char] -> ThreatModel () -> ThreatModel ()
forall a. [Char] -> ThreatModel a -> ThreatModel a
Named ([Char]
"Large Data Attack (max " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" fields)") (ThreatModel () -> ThreatModel ())
-> ThreatModel () -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$ do
  -- Precondition: transaction must spend a script input (otherwise no validator runs)
  Input
_ <- (Input -> Bool) -> ThreatModel Input
anyInputSuchThat (Bool -> Bool
not (Bool -> Bool) -> (Input -> Bool) -> Input -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AddressAny -> Bool
isKeyAddressAny (AddressAny -> Bool) -> (Input -> AddressAny) -> Input -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Input -> AddressAny
forall t. IsInputOrOutput t => t -> AddressAny
addressOf)

  -- Get all outputs from the transaction
  [Output]
outputs <- ThreatModel [Output]
getTxOutputs

  -- Filter to script outputs with inline datums
  let scriptOutputsWithDatum :: [Output]
scriptOutputsWithDatum = (Output -> Bool) -> [Output] -> [Output]
forall a. (a -> Bool) -> [a] -> [a]
filter Output -> Bool
isScriptOutputWithInlineDatum [Output]
outputs

  -- Precondition: there must be at least one script output with inline datum
  ThreatModel () -> ThreatModel ()
forall a. ThreatModel a -> ThreatModel a
threatPrecondition (ThreatModel () -> ThreatModel ())
-> ThreatModel () -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$ Bool -> ThreatModel ()
ensure (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Output] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Output]
scriptOutputsWithDatum)

  -- Pick a target output
  Output
target <- [Output] -> ThreatModel Output
forall a. Show a => [a] -> ThreatModel a
pickAny [Output]
scriptOutputsWithDatum

  -- Extract the inline datum (we know it exists due to the filter)
  ScriptData
originalDatum <- case Output -> Maybe ScriptData
getInlineDatum Output
target of
    Maybe ScriptData
Nothing -> [Char] -> ThreatModel ScriptData
forall a. [Char] -> ThreatModel a
failPrecondition [Char]
"Script output missing inline datum"
    Just ScriptData
originalDatum' -> ScriptData -> ThreatModel ScriptData
forall a. a -> ThreatModel a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ScriptData
originalDatum'

  let bloatedDatum :: ScriptData
bloatedDatum = Int -> ScriptData -> ScriptData
bloatData Int
n ScriptData
originalDatum

  [Char] -> ThreatModel ()
counterexampleTM ([Char] -> ThreatModel ()) -> [Char] -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$
    [[Char]] -> [Char]
paragraph
      [ [Char]
"The transaction contains a script output at index"
      , TxIx -> [Char]
forall a. Show a => a -> [Char]
show (Output -> TxIx
outputIx Output
target)
      , [Char]
"with an inline datum."
      ]

  [Char] -> ThreatModel ()
counterexampleTM ([Char] -> ThreatModel ()) -> [Char] -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$
    [[Char]] -> [Char]
paragraph
      [ [Char]
"Testing if the datum can be bloated with"
      , Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n
      , [Char]
"extra fields while still passing validation."
      ]

  [Char] -> ThreatModel ()
counterexampleTM ([Char] -> ThreatModel ()) -> [Char] -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$
    [[Char]] -> [Char]
paragraph
      [ [Char]
"If this validates, the script's FromData parser is permissive"
      , [Char]
"and ignores extra Constr fields. An attacker could exploit this"
      , [Char]
"to make a single datum satisfy multiple validators,"
      , [Char]
"or to bypass certain datum-based checks."
      ]

  -- Try to validate with the bloated datum
  TxModifier -> ThreatModel ()
shouldNotValidate (TxModifier -> ThreatModel ()) -> TxModifier -> ThreatModel ()
forall a b. (a -> b) -> a -> b
$ Output -> Datum -> TxModifier
forall t. IsInputOrOutput t => t -> Datum -> TxModifier
changeDatumOf Output
target (ScriptData -> Datum
toInlineDatum ScriptData
bloatedDatum)

{- | Bloat a @ScriptData@ value by appending extra fields to a @Constr@.

For @ScriptDataConstructor idx fields@, appends @n@ copies of
@ScriptDataNumber 42@ to the fields list.

For other @ScriptData@ variants (Map, List, Number, Bytes), returns
the value unchanged since they don't have the Constr structure that
typical FromData instances parse.
-}
bloatData :: Int -> ScriptData -> ScriptData
bloatData :: Int -> ScriptData -> ScriptData
bloatData Int
n ScriptData
sd = case ScriptData
sd of
  ScriptDataConstructor Integer
idx [ScriptData]
fields ->
    let extraFields :: [ScriptData]
extraFields = Int -> ScriptData -> [ScriptData]
forall a. Int -> a -> [a]
replicate Int
n (Integer -> ScriptData
ScriptDataNumber Integer
42)
     in Integer -> [ScriptData] -> ScriptData
ScriptDataConstructor Integer
idx ([ScriptData]
fields [ScriptData] -> [ScriptData] -> [ScriptData]
forall a. [a] -> [a] -> [a]
++ [ScriptData]
extraFields)
  -- Other cases: return unchanged
  ScriptData
_ -> ScriptData
sd

-- | Check if an output is a script output with an inline datum.
isScriptOutputWithInlineDatum :: Output -> Bool
isScriptOutputWithInlineDatum :: Output -> Bool
isScriptOutputWithInlineDatum Output
output =
  Bool -> Bool
not (AddressAny -> Bool
isKeyAddressAny (Output -> AddressAny
forall t. IsInputOrOutput t => t -> AddressAny
addressOf Output
output)) Bool -> Bool -> Bool
&& Output -> Bool
hasInlineDatum Output
output

-- | Check if an output has an inline datum.
hasInlineDatum :: Output -> Bool
hasInlineDatum :: Output -> Bool
hasInlineDatum Output
output =
  case TxOut CtxTx Era -> Datum
forall ctx. TxOut ctx Era -> TxOutDatum ctx Era
datumOfTxOut (Output -> TxOut CtxTx Era
outputTxOut Output
output) of
    TxOutDatumInline{} -> Bool
True
    Datum
_ -> Bool
False

-- | Extract the inline datum from an output if present.
getInlineDatum :: Output -> Maybe ScriptData
getInlineDatum :: Output -> Maybe ScriptData
getInlineDatum Output
output =
  case TxOut CtxTx Era -> Datum
forall ctx. TxOut ctx Era -> TxOutDatum ctx Era
datumOfTxOut (Output -> TxOut CtxTx Era
outputTxOut Output
output) of
    TxOutDatumInline BabbageEraOnwards Era
_ HashableScriptData
hashableData -> ScriptData -> Maybe ScriptData
forall a. a -> Maybe a
Just (HashableScriptData -> ScriptData
getScriptData HashableScriptData
hashableData)
    Datum
_ -> Maybe ScriptData
forall a. Maybe a
Nothing

-- | Convert a @ScriptData@ to an inline @Datum@ (TxOutDatum CtxTx Era).
toInlineDatum :: ScriptData -> Datum
toInlineDatum :: ScriptData -> Datum
toInlineDatum ScriptData
sd =
  BabbageEraOnwards Era -> HashableScriptData -> Datum
forall era ctx.
BabbageEraOnwards era -> HashableScriptData -> TxOutDatum ctx era
TxOutDatumInline BabbageEraOnwards Era
BabbageEraOnwardsConway (ScriptData -> HashableScriptData
unsafeHashableScriptData ScriptData
sd)