xquery version "3.1"; (:~ : Constructors and accessors for random distributions. : Parameters are usually numeric but can be functions to create dynamically : mutating distributions. See core/random.xqy for details. : : Copyright© Mary Holstege 2020-2024 : CC-BY (https://creativecommons.org/licenses/by/4.0/) : @since December 2020 : @custom:Status Stable :) module namespace this="http://mathling.com/type/distribution"; import module namespace util="http://mathling.com/core/utilities" at "../core/utilities.xqy"; declare namespace art="http://mathling.com/art"; declare namespace map="http://www.w3.org/2005/xpath-functions/map"; declare namespace math="http://www.w3.org/2005/xpath-functions/math"; (:====================================================================== : Basic distribution constructors :======================================================================:) (:~ : constant() : Construct a constant distribution with the given value : : @param $value: the value of the constant distribution :) declare %art:distribution function this:constant( $value as item() ) as map(xs:string,item()*) { map { "distribution" : "constant", "min" : $value }=>this:set-is-complex() }; (:~ : constant-index-of() : Construct a constant distribution that functions as an index for the value : : @param $value: the constant value :) declare %art:distribution function this:constant-index-of( $value as item() ) as map(xs:string, item()*) { map { "distribution" : "constant", "min" : 1, "cast": "integer", "keys": $value }=>this:set-is-complex() }; (:~ : uniform() : Construct a uniform distribution over the given value range : : @param $min: minimum value of the distribution : @param $max: maximum value of the distribution :) declare %art:distribution function this:uniform( $min as item(), $max as item() ) as map(xs:string, item()*) { map { "distribution" : "uniform", "min" : $min, "max" : $max }=>this:set-is-complex() }; (:~ : uniform-index() : Construct a uniform distribution that functions as an index over : integers starting : : @param $start: starting index : @param $count: how many index values :) declare %art:distribution function this:uniform-index( $start as xs:integer, $count as xs:integer ) as map(xs:string, item()*) { map { "distribution" : "uniform", "min" : $start, "max" : $start + $count, "cast": "integer" }=>this:set-is-complex() }; (:~ : uniform-index-of() : Construct a uniform distribution that functions as an index over : the values : : @param $values: the values :) declare %art:distribution function this:uniform-index-of( $values as item()* ) as map(xs:string, item()*) { if (count($values) = 1) then ( map { "distribution" : "constant", "min" : 1, "cast": "integer", "keys": $values }=>this:set-is-complex() ) else ( map { "distribution" : "uniform", "min" : 1, "max" : count($values), "cast": "integer", "keys": $values }=>this:set-is-complex() ) }; (:~ : normal() : Construct a normal distribution : : @param $mean: mean value of the distribution : @param $std: standard deviation of the distribution :) declare %art:distribution function this:normal( $mean as item(), $std as item() ) as map(xs:string, item()*) { map { "distribution" : "normal", "mean" : $mean, "std" : $std }=>this:set-is-complex() }; (:~ : normal-index-of() : Construct a normal distribution that acts as an index over the value : Indexes out of range will be resampled. : : @param $std: standard deviation of the distribution (mean is center of range) : @param $values: the values :) declare %art:distribution function this:normal-index-of( $std as item(), $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "normal", "mean" : count($values) div 2, "std" : $std, "cast": "integer", "min": 1, "max": count($values), "truncation": "resample", "keys": $values }=>this:set-is-complex() }; (:~ : normal() : Construct a normal distribution with default mean (0) and std (1) :) declare %art:distribution function this:normal() as map(xs:string, item()*) { this:normal(0, 1) }; (:~ : skewed() : Construct a skewed normal distribution : : @param $mean: mean value of the distribution : @param $std: standard deviation of the distribution : @param $skew: skew of the distribution : : Skewed distribution is defined by parameters α, ξ, and ω : α is skew : α = 0 => normal, α > 0 right skewed, α < 0 left skewed : right skewed => long tail on right : ξ = location = shift along x : ω = scaling along y : ω is positive : : Mean is mean value; if have long tail on right, means will have more : instances below the mean, but larger variance to higher values : : mean = ξ + ω*δ*sqrt(2/π) δ=α/sqrt(1+α*α) : std = ω*ω*(1 - 2*δ*δ/π) : : So: : ω = sqrt(std / (1 - 2*δ*δ/π)) : ξ = mean - ω*δ*sqrt(2/π) :) declare %art:distribution function this:skewed( $mean as item(), $std as item(), $skew as item() ) as map(xs:string, item()*) { map { "distribution" : "skewed", "mean" : $mean, "std" : $std, "skew" : $skew }=>this:set-is-complex() }; (:~ : skewed-index-of() : Construct a skewed normal distribution that acts as an index over the given : values. Indexes out of range will be resampled. : : @param $mean: mean value of the distribution : @param $std: standard deviation of the distribution : @param $skew: skew of the distribution : @param $values: the values :) declare %art:distribution function this:skewed-index-of( $mean as item(), $std as item(), $skew as item(), $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "skewed", "mean" : $mean, "std" : $std, "skew" : $skew, "cast": "integer", "min": 1, "max": count($values), "truncation": "resample", "keys": $values }=>this:set-is-complex() }; (:~ : skewed() : Construct a skewed normal distribution with default mean, : standard deviation, and skew :) declare %art:distribution function this:skewed() as map(xs:string, item()*) { this:skewed(0, 1, 0) }; (:~ : bernoulli() : Construct a Bernoulli distribution : : @param $p: Probability of returning 1 as percent [0,100] :) declare %art:distribution function this:bernoulli( $p as item() ) as map(xs:string, item()*) { map { "distribution" : "bernoulli", "p" : $p }=>this:set-is-complex() }; (:~ : bernoulli-index-of() : Construct a Bernoulli distribution that acts as an index over the (pair of) : values: 0 => 1st; 1 => 2nd. : That is, dist:bernouilli-index-of(20, ("a","b")) returns "b" 20% of the time : and "a" 80% of the time. : : @param $p: Probability of returning 1 as percent [0,100] : @param $values: the pair of values :) declare %art:distribution function this:bernoulli-index-of( $p as item(), $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "bernoulli", "p" : $p, "cast": "integer", "keys": $values, "post-shift": 1 }=>this:set-is-complex() }; (:~ : bernoulli() : Construct a Bernoulli distribution with default probability :) declare %art:distribution function this:bernoulli() as map(xs:string, item()*) { this:bernoulli(50) }; (:~ : flip() : Construct a Bernoulli distribution rendered as boolean values : : @param $p: Probability of returning true() :) declare %art:distribution function this:flip( $p as item() ) as map(xs:string, item()*) { map { "distribution" : "flip", "p" : $p }=>this:set-is-complex() }; (:~ : flip() : Construct a Bernoulli distribution rendered as boolean values with : default probability :) declare %art:distribution function this:flip() as map(xs:string, item()*) { this:flip(50) }; (:~ : zipf-sums() : Returns a sequence of cumulative Zipf probabilities, to be used : by select-index() to perform selections. This is to ensure we don't have to : keep recomputing the cumulative sums for each selection. : : @param $alpha: the alpha parameter, should be >=1 : That said, numbers < 1 work: : 0 = α => uniform : 0 < α < 1 => get more representation of higher ranks : α < 0 => invert the range, so that higher ranks are more likely than lower ones : (Rounded) cumulative percents for various α values: : 1 2 3 4 5 6 7 8 9 10 : z(-3,10)= 0.0 0.3 1.2 3.3 7.4 14.6 25.9 42.8 66.9 100.0 : z(-2,10)= 0.3 1.3 3.6 7.8 14.3 23.6 36.4 53.0 74.0 100.0 : z(-1.1,10)= 1.5 4.7 9.8 16.7 25.6 36.4 49.2 64.1 81.0 100.0 : z(-1,10)= 1.8 5.5 10.9 18.2 27.3 38.2 50.9 65.5 81.8 100.0 : z(-0.9,10)= 2.2 6.3 12.1 19.8 29.1 40.0 52.6 66.8 82.6 100.0 : z(-0.5,10)= 4.5 10.7 18.5 27.4 37.3 48.2 60.0 72.6 85.9 100.0 : z(-0.3,10)= 6.2 13.9 22.6 32.0 42.1 52.7 63.9 75.5 87.6 100.0 : z(0,10)= 10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0 90.0 100.0 : z(0.3,10)= 15.4 27.9 38.9 49.1 58.5 67.5 76.1 84.3 92.3 100.0 : z(0.5,10)= 19.9 34.0 45.5 55.5 64.4 72.5 80.0 87.1 93.7 100.0 : z(0.9,10)= 31.0 47.7 59.2 68.1 75.4 81.6 87.0 91.8 96.1 100.0 : z(1,10)= 34.1 51.2 62.6 71.1 78.0 83.6 88.5 92.8 96.6 100.0 : z(1.1,10)= 37.3 54.7 65.9 74.0 80.3 85.5 89.9 93.7 97.0 100.0 : z(2,10)= 64.5 80.7 87.8 91.9 94.4 96.2 97.5 98.6 99.4 100.0 : z(3,10)= 83.5 93.9 97.0 98.3 99.0 99.4 99.6 99.8 99.9 100.0 : @param $n: number to generate :) declare function this:zipf-sums($alpha as xs:double, $n as xs:integer) as xs:double* { let $c := fold-left( 1 to $n, 0, function($z as xs:double, $a as xs:integer) as xs:double { $z + 1 div (math:pow(xs:double($a), $alpha)) } ) let $c := 1 div $c return fold-left( 1 to $n, 0, function($z as xs:double*, $a as xs:integer) as xs:double* { $z, $z[last()] + $c div math:pow(xs:double($a), $alpha) } )=>tail() }; (:~ : zipf() : Construct a Zipf distribution as an integer index from 1 to N : : p(n)=(1/k^α)/sum(i=1 to n)(1/i^α) : n = number of elements : k = rank : α = exponent : : @param $alpha: the α parameter, should be >=1 : For English words this by some estimates is around 1.6 : For city sizes 1.07 : @param $n: number of Zipf values in range :) declare %art:distribution function this:zipf( $alpha as xs:double, $n as xs:integer ) as map(xs:string, item()*) { let $zipf-weights as xs:double* := this:zipf-sums($alpha,$n+1) return map { "distribution" : "zipf", "cast" : "integer", "alpha" : $alpha, "sums" : $zipf-weights, "max" : $n, "truncation" : "resample" }=>this:set-is-complex() }; (:~ : zipf() : Construct a Zipf distribution as an integer index from 1 to N with : default alpha and limit :) declare %art:distribution function this:zipf() as map(xs:string, item()*) { this:zipf(1,1000) }; (:~ : zipf-index-of() : Construct a Zipf index over the set of values, taking them in the order : given. :) declare %art:distribution function this:zipf-index-of( $alpha as xs:double, $values as item()* ) as map(xs:string,item()*) { let $n := count($values) let $zipf-weights as xs:double* := this:zipf-sums($alpha,$n+1) return map { "distribution" : "zipf", "cast" : "integer", "alpha" : $alpha, "sums" : $zipf-weights, "max" : $n, "truncation" : "resample", "keys": $values }=>this:set-is-complex() }; (:~ : markov-sums() : Return a Markov probability matrix as a matrix where each row contains : the cumulative probability sums for that row. This allows the Markov : distribution selection to run more efficiently. : : Example: : markov-sums(3, ( : 0.2, 0.4, 0.4, : 0.5, 0.5, 0, : 0.4, 0.2, 0.4 : )) => ( : 0.2, 0.6, 1, : 0.5, 1, 1, : 0.4, 0.6, 1 : ) : : To account for rounding, the last item in the row is always forced to 1 : and any sums greater than 1 are also rounded down to 1. : : @param $dim: size of each dimension of matrix, math:sqrt(count($matrix)) : @param $matrix: the input non-cumulative Markov matrix :) declare function this:markov-sums($dim as xs:integer, $matrix as xs:double*) as xs:double* { for $i in 1 to $dim let $row := $matrix[position()=(($i - 1) * $dim + 1 to $i * $dim)] let $new-row := fold-left( $row, 0, function($z as xs:double*, $a as xs:double) as xs:double* { $z, min(($z[last()] + $a, 1)) } )=>tail() return ( if ($new-row[last()] lt 1) then ($new-row[position() lt last()], 1) else $new-row ) }; (:~ : markov-percent-sums() : Return a Markov probability matrix as a matrix where each row contains : the cumulative probability sums for that row. This allows the Markov : distribution selection to run more efficiently. : : Example: : markov-percent-sums(3, ( : 20, 40, 40, : 50, 50, 0, : 40, 20, 40 : )) => ( : 0.2, 0.6, 1, : 0.5, 1, 1, : 0.4, 0.6, 1 : ) : : To account for rounding, the last item in the row is always forced to 1 : and any sums greater than 1 are also rounded down to 1. : : @param $dim: size of each dimension of matrix, math:sqrt(count($matrix)) : @param $matrix: the input non-cumulative Markov matrix :) declare function this:markov-percent-sums($dim as xs:integer, $matrix as xs:integer*) as xs:double* { for $i in 1 to $dim let $row := $matrix[position()=(($i - 1) * $dim + 1 to $i * $dim)] let $new-row := fold-left( $row, 0, function($z as xs:double*, $a as xs:double) as xs:double* { $z, min(($z[last()] + ($a div 100), 1)) } )=>tail() return ( if ($new-row[last()] lt 1) then ($new-row[position() lt last()],1) else $new-row ) }; (:~ : markov() : Construct a Markov distribution given a square matrix of probabilities : expressed as percentages (0 to 100). : : Example: : (0, 10, 90 : 10, 0, 90, : 33, 33, 33) : Says state 1 transitions to state 2 10% of the time and state 3 90% of the : time; state 2 transitions to state 1 10% of the time and state 3 90% of the : time; state3 transitions to each state 1/3 of the time (actually, due to : rounding, state 3 transitions to state 3 34% of the time) : : @param $start: Starting state (an index in the range 1 to sqrt(count($matrix)) : @param $matrix: Square matrix of percentages :) declare %art:distribution function this:markov( $start as xs:integer, $percent-matrix as xs:integer* ) as map(xs:string, item()*) { let $dim := math:sqrt(count($percent-matrix)) cast as xs:integer return map { "distribution" : "markov", "dim" : $dim, "sums" : this:markov-percent-sums($dim, $percent-matrix), "start" : $start }=>this:set-is-complex() }; (:~ : markov() : Construct a Markov distribution given a square matrix of probabilities : expressed as percentages (0 to 100). 1 as start : : @param $percent-matrix: Square matrix of percentages :) declare %art:distribution function this:markov( $percent-matrix as xs:integer* ) as map(xs:string, item()*) { this:markov(1, $percent-matrix) }; (:~ : markov-index-of() : Construct a Markov distribution as an index over a set of values. : : @param $percent-matrix: Square matrix of percentages : @param $values: key values :) declare %art:distribution function this:markov-index-of( $percent-matrix as xs:integer*, $values as item()* ) as map(xs:string, item()*) { let $dim := math:sqrt(count($percent-matrix)) cast as xs:integer return map { "distribution" : "markov", "dim" : $dim, "sums" : this:markov-percent-sums($dim, $percent-matrix), "start" : 1, "keys": $values, "max": count($values), "cast": "integer" }=>this:set-is-complex() }; (:~ : percent-sums() : Returns a sequence of cumulative probabilities, to be used : by select-index() to perform selections. This is to ensure we don't have to : keep recomputing the cumulative sums for each selection. : : To account for rounding, the last item is always forced to 1 and any sums : greater than 1 are also rounded down to 1. To get expected distributions, : ensure that the input percentages sum to 100. : : Sums are returned in map:keys() order, so this assumes stability of that : function between calls to percent-sums() and calls to selection(). : : @param $weights a map from key to percent (expressed as number 0 to 100) :) declare function this:percent-sums($weights as map(xs:anyAtomicType,xs:numeric)) as xs:double* { let $percentages := for $k in $weights=>map:keys() return $weights=>map:get($k) let $sums := fold-left( $percentages, 0, function($z as xs:double*, $a as xs:numeric) as xs:double* { $z, min(($z[last()] + ($a div 100), 1)) } )=>tail() return ( if ($sums[last()] lt 1) then ($sums[position() lt last()],1) else $sums ) }; (:~ : simple-sums() : Construct a cumulative weight table from an ordered set of weights. [0,1] :) declare function this:simple-sums( $weights as xs:double* ) as xs:double* { let $sums := fold-left( $weights, 0, function($z as xs:double*, $a as xs:double) as xs:double* { $z, min(($z[last()] + $a, 1)) } )=>tail() return ( if ($sums[last()] lt 1) then ($sums[position() lt last()],1) else $sums ) }; (:~ : sums() : Construct an ad hoc index distribution based on a weight map. : Assumes that keys are returned in a consistent order from the table : : Example: : {"a": 10, "b": 30, "c": 60} : If keys are returned in that order, will return 1 10% of the time, 2 : 30% of the time, and 3 60% of the time. : : @param $weight-table: map of keys to percentages :) declare %art:distribution function this:sums( $weight-table as map(xs:anyAtomicType, xs:numeric) ) as map(xs:string, item()*) { map { "distribution" : "sums", "cast" : "integer", "sums" : this:percent-sums($weight-table), "keys": $weight-table=>map:keys() }=>this:set-is-complex() }; (:~ : weighted-index-of() : Construct a sums() distribution based on a set of *relative* weights: : will rescale to percentages. : @param $weights: sequences of weights : @param $values: sequence of values corresponding to the weights : @return distribution :) declare %art:distribution function this:weighted-index-of( $weights as xs:double*, $values as item()* ) as map(xs:string,item()*) { util:assert(count($weights) = count($values), "Different numbers of weights and values"), let $multiplier := 1.0 div sum($weights) return map { "distribution" : "sums", "cast" : "integer", "sums" : this:simple-sums($weights!(. * $multiplier)), "keys": $values }=>this:set-is-complex() }; declare %art:deprecated function this:weighted-distribution-of( $weights as xs:double*, $values as item()* ) as map(xs:string,item()*) { this:weighted-index-of($weights, $values) }; (:~ : weighted-index-of() : Construct a sums() distribution based on a set of *relative* weights: : will rescale to percentages. : @param $weight-table: a map from keys to weights : @return distribution :) declare %art:distribution function this:weighted-index-of( $weight-table as map(xs:anyAtomicType, xs:numeric) ) as map(xs:string,item()*) { let $weights := for $k in $weight-table=>map:keys() return $weight-table=>map:get($k) let $values := $weight-table=>map:keys() let $multiplier := 1.0 div sum($weights) return map { "distribution" : "sums", "cast" : "integer", "sums" : this:simple-sums($weights!(. * $multiplier)), "keys": $values }=>this:set-is-complex() }; (:~ : multimodal() : Construct a multimodal distribution with a specific selection distribution : : @param $distributions: the component distributions : @param $selector: selection distribution, must return integers in range [1,|dist|] :) declare %art:distribution function this:multimodal( $distributions as map(xs:string, item()*)*, $selector as map(xs:string,item()*) ) as map(xs:string, item()*) { map { "distribution" : "multimodal", "distributions" : $distributions, "selector": $selector }=>this:set-is-complex() }; (:~ : multimodal() : Construct a multimodal distribution (uniform selection) : : @param $distributions: the component distributions :) declare %art:distribution function this:multimodal( $distributions as map(xs:string, item()*)* ) as map(xs:string, item()*) { map { "distribution" : "multimodal", "distributions" : $distributions, "selector": this:uniform(1, count($distributions))=>this:cast("integer") }=>this:set-is-complex() }; (:~ : binomial() : Construct a binomial distribution : : @param $percent probability as percent [0,100] : @param $n max value :) declare %art:distribution function this:binomial( $n as xs:integer, $percent as xs:double ) as map(xs:string, item()*) { let $p := $percent div 100 let $binomial-weights := this:simple-sums( for $k in 1 to $n return ( util:binomial($n, $k)*math:pow($p, $k)*math:pow(1 - $p, $n - $k) ) ) return ( map { "distribution" : "binomial", "p" : $percent, "max": $n, "sums": $binomial-weights, "cast": "integer" }=>this:set-is-complex() ) }; (:~ : binomial-index-of() : Construct a binomial distribution that functions as an index over the values. : Indexes less that 1 will be resampled. The distribution parameter 'n' is the : number of values. : @param $percent probability as percent [0,100] : @param $values the values : @return distribution :) declare %art:distribution function this:binomial-index-of( $percent as xs:double, $values as item()* ) as map(xs:string,item()*) { let $n := count($values) let $p := $percent div 100 let $binomial-weights := this:simple-sums( for $k in 1 to $n return ( util:binomial($n, $k)*math:pow($p, $k)*math:pow(1 - $p, $n - $k) ) ) return ( map { "distribution" : "binomial", "p" : $percent, "max": $n, "sums": $binomial-weights, "cast": "integer", "min": 1, "truncation": "resample", "keys": $values }=>this:set-is-complex() ) }; (:~ : binomial-poisson() : Construct a Poisson binomial distribution : : @param $probabilities: probabilities of the component Bernoulli distributions : as percentages :) declare %art:distribution function this:binomial-poisson( $probabilities as item()* ) as map(xs:string, item()*) { map { "distribution" : "binomial-poisson", "probabilities" : $probabilities }=>this:set-is-complex() }; (:~ : binomial-poisson-index-of() : Construct a Poisson binomial distribution that acts as an index over the values. : Indexes out of range will be resampled. : : @param $probabilities: probabilities of the component Bernoulli distributions : as percentages : @param $values: the values :) declare %art:distribution function this:binomial-poisson-index-of( $probabilities as item()*, $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "binomial-poisson", "probabilities" : $probabilities, "min": 1, "max": count($values), "truncation": "resample", "cast": "integer", "keys": $values }=>this:set-is-complex() }; (:~ : poisson() : Construct a Poisson distribution : : @param $λ: the λ parameter of the distribution :) declare %art:distribution function this:poisson( $λ as item() ) as map(xs:string, item()*) { map { "distribution" : "poisson", "mean": $λ }=>this:set-is-complex() }; (:~ : poisson-index-of() : Construct a Poisson distribution that acts as an index over the values. : Indexes out of range will be resampled. : : @param $λ: the λ parameter of the distribution : @param $values: the values :) declare %art:distribution function this:poisson-index-of( $λ as item(), $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "poisson", "mean": $λ, "min": 1, "max": count($values), "truncation": "resample", "cast": "integer", "keys": $values }=>this:set-is-complex() }; (:~ : exponential() : Construct an exponential distribution : @param $λ: the λ parameter of the distribution :) declare %art:distribution function this:exponential( $λ as item() ) as map(xs:string, item()*) { map { "distribution" : "exponential", "lambda": $λ }=>this:set-is-complex() }; (:~ : exponential-index-of() : Construct an exponential distribution that acts as an index over the values. : Indexes out of range will be resampled : @param $λ: the λ parameter of the distribution : @param $values: the values :) declare %art:distribution function this:exponential-index-of( $λ as item(), $values as item()* ) as map(xs:string, item()*) { map { "distribution" : "exponential", "lambda": $λ, "min": 1, "max": count($values), "truncation": "resample", "cast": "integer", "keys": $values }=>this:set-is-complex() }; (:~ : gamma() : Construct a gamma distribution : @param $k: the k (shape) parameter of the distribution : @param $θ: the θ (scaling) parameter of the distribution :) declare %art:distribution function this:gamma( $k as item(), $θ as item() ) as map(xs:string,item()*) { map { "distribution": "gamma", "k": $k, "theta": $θ }=>this:set-is-complex() }; (:~ : gamma-index-of() : Construct a gamma distribution that acts as an index over the values. : Indexes out of range will be rescampled. : @param $k: the k (shape) parameter of the distribution : @param $θ: the θ (scaling) parameter of the distribution : @param $values: the values :) declare %art:distribution function this:gamma-index-of( $k as item(), $θ as item(), $values as item()* ) as map(xs:string,item()*) { map { "distribution": "gamma", "k": $k, "theta": $θ, "min": 1, "max": count($values), "truncation": "resample", "cast": "integer", "keys": $values }=>this:set-is-complex() }; (:~ : beta() : Construct a beta distribution. : The parameters influence the shape of the curve so: : α=β => symmetric : α=β < 1 => U curve : α=β=1 => uniform[α,β] : α=β > 1 => inverse U : α!=β => skewed, positive skew α < β, negative for α > β : α, β < 1 => U curve : α, β > 1 => inverse U : α < 1, β >= 1 => flipped J right tail : α >= 1, β < 1 => J left tail : α = 1, β > 1 => monotone decrease : 1 < β < 2 => concave : β = 2 => line : β > 2 => flipped J right tail : α > 1, β = 1 => monotone increase : 2 > α > 1 => concave : α = 2 => straight : α > 2 => J left tail : : @param $α: the α parameter of the distribution (>0) : @param $β: the β parameter of the distribution (>0) :) declare %art:distribution function this:beta( $α as item(), $β as item() ) as map(xs:string,item()*) { map { "distribution": "beta", "alpha": $α, "beta": $β }=>this:set-is-complex() }; (:~ : beta-index-of() : Construct a beta distribution that acts as an index over the values. : Indexes out of range will be resampled. : : @param $α: the α parameter of the distribution (>0) : @param $β: the β parameter of the distribution (>0) : @param $values: the values :) declare %art:distribution function this:beta-index-of( $α as item(), $β as item(), $values as item()* ) as map(xs:string,item()*) { map { "distribution": "beta", "alpha": $α, "beta": $β, "min": 1, "max": count($values), "truncation": "resample", "cast": "integer", "keys": $values }=>this:set-is-complex() }; (:~ : binomial-beta() : Beta binomial distribution. : μ = nα/(α+β) : σ² = nαβ(α+β+n)/((α+β)²(α+β+1)) : : @param $n n distribution parameter, the maximum : @param $α: the α parameter of the distribution (>0) : @param $β: the β parameter of the distribution (>0) :) declare %art:distribution function this:binomial-beta( $n as item(), $α as item(), $β as item() ) as map(xs:string,item()*) { map { "distribution": "binomial-beta", "max": $n, "alpha": $α, "beta": $β, "cast": "integer" }=>this:set-is-complex() }; (:~ : binomial-beta-index-of() : Beta binomial distribution that acts as index of the given values. : Indexes out of range resampled. The distribution parameter 'n' is the number : of values. : : @param $α: the α parameter of the distribution (>0) : @param $β: the β parameter of the distribution (>0) : @param $values the values :) declare %art:distribution function this:binomial-beta-index-of( $α as item(), $β as item(), $values as item()* ) as map(xs:string,item()*) { map { "distribution": "binomial-beta", "max": count($values), "alpha": $α, "beta": $β, "cast": "integer", "min": 1, "truncation": "resample", "keys": $values }=>this:set-is-complex() }; (:====================================================================== : Accessors :======================================================================:) declare function this:distribution( $distribution as map(xs:string, item()*) ) as xs:string { $distribution("distribution") }; declare function this:cast( $distribution as map(xs:string, item()*) ) as xs:string { $distribution("cast") }; declare function this:min( $distribution as map(xs:string, item()*) ) as item() { $distribution("min") }; declare function this:max( $distribution as map(xs:string, item()*) ) as item() { $distribution("max") }; declare function this:pre-multiplier( $distribution as map(xs:string, item()*) ) as item() { $distribution("pre-multiplier") }; declare function this:post-multiplier( $distribution as map(xs:string, item()*) ) as item() { $distribution("post-multiplier") }; declare function this:post-shift( $distribution as map(xs:string, item()*) ) as item() { $distribution("post-shift") }; declare function this:truncation( $distribution as map(xs:string, item()*) ) as item() { $distribution("truncation") }; declare function this:mean( $distribution as map(xs:string, item()*) ) as item() { $distribution("mean") }; declare function this:std( $distribution as map(xs:string, item()*) ) as item() { $distribution("std") }; declare function this:skew( $distribution as map(xs:string, item()*) ) as item() { $distribution("skew") }; declare function this:p( $distribution as map(xs:string, item()*) ) as item() { $distribution("p") }; declare function this:probabilities( $distribution as map(xs:string, item()*) ) as item() { $distribution("probabilities") }; declare function this:start( $distribution as map(xs:string, item()*) ) as item() { $distribution("start") }; declare function this:keys( $distribution as map(xs:string, item()*) ) as item()* { $distribution("keys") }; declare function this:distributions( $distribution as map(xs:string, item()*) ) as map(xs:string,item()*)* { $distribution("distributions") }; declare function this:selector( $distribution as map(xs:string, item()*) ) as map(xs:string,item()*) { $distribution("selector") }; declare function this:is-complex( $distribution as map(xs:string, item()*) ) as xs:boolean { $distribution("is_complex") }; (:====================================================================== : Modifier functions :======================================================================:) (:~ : cast() : Modify the distribution to cast values. Applies after other modifiers. : : @param $distribution: the distribution to modify : @param $cast: one of "integer", "decimal", "boolean", "string", or "double" : decimal will cast to a double value with $rand:DECIMAL-DIGITS following : the decimal point : Some of the distributions (e.g. zipf() automatically add an integer cast; : the "double" cast will remove that :) declare function this:cast( $distribution as map(xs:string, item()*), $cast as xs:string ) as map(xs:string, item()*) { if ($cast="double") then ( $distribution=> map:remove("cast")=> this:set-is-complex() ) else ( $distribution=> map:put("cast", $cast)=> this:set-is-complex() ) }; declare function this:double( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>this:cast("double") }; declare function this:integer( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>this:cast("integer") }; declare function this:decimal( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>this:cast("decimal") }; declare function this:boolean( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>this:cast("boolean") }; declare function this:string( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>this:cast("string") }; (:~ : min() : Modify the distribution to set a minimum value (after pre-multiplier) : : @param $distribution: the distribution to modify : @param $min: the minimum value : for distributions (like normal) that may generate results less than : this, the truncation setting will determine how to handle the case :) declare function this:min( $distribution as map(xs:string, item()*), $min as item() ) as map(xs:string, item()*) { $distribution=> map:put("min", $min)=> this:set-is-complex() }; (:~ : max() : Modify the distribution to set a maximum value (after pre-multiplier) : : @param $distribution: the distribution to modify : @param $max: the maximum value : for distributions (like normal) that may generate results greater than : this, the truncation setting will determine how to handle the case :) declare function this:max( $distribution as map(xs:string, item()*), $max as item() ) as map(xs:string, item()*) { $distribution=> map:put("max", $max)=> this:set-is-complex() }; (:~ : pre-multiplier() : Modify the distribution to set a multiplier to apply to the value before : applying min/max and other modifications. : : @param $distribution: the distribution to modify : @param $pre-multiplier: the multiplier :) declare function this:pre-multiplier( $distribution as map(xs:string, item()*), $pre-multiplier as item() ) as map(xs:string, item()*) { $distribution=> map:put("pre-multiplier", $pre-multiplier)=> this:set-is-complex() }; (:~ : post-multiplier() : Modify the distribution to set a multiplier to apply to the value after : applying min/max. : : @param $distribution: the distribution to modify : @param $post-multiplier: the multiplier :) declare function this:post-multiplier( $distribution as map(xs:string, item()*), $post-multiplier as item() ) as map(xs:string, item()*) { $distribution=> map:put("post-multiplier", $post-multiplier)=> this:set-is-complex() }; (:~ : post-shift() : Modify the distribution to set add a shift to the value after : applying min/max. : : @param $distribution: the distribution to modify : @param $post-shift: the shift :) declare function this:post-shift( $distribution as map(xs:string, item()*), $post-shift as item() ) as map(xs:string, item()*) { $distribution=> map:put("post-shift", $post-shift)=> this:set-is-complex() }; (:~ : truncation() : Modify the distribution to set a policy for handling values out of the : min/max range. : : @param $distribution: the distribution to modify : @param $truncation: the truncation policy : resample: (the default) retry up to $rand:RESAMPLE-LIMIT times : and then cap the value to minimum or maximum : ceiling: cap the value to minimum or maximum : Faster, but skews the distribution, perhaps badly : drop: drop the value entirely (i.e. return empty sequence) : Fairly useless for most applications that want a definitive number : of random values :) declare function this:truncation( $distribution as map(xs:string, item()*), $truncation as item() ) as map(xs:string, item()*) { $distribution=> map:put("truncation", $truncation)=> this:set-is-complex() }; (:~ : mean() : Modify the distribution to set a mean value. (normal, skewed) : : @param $distribution: the distribution to modify : @param $mean: mean value :) declare function this:mean( $distribution as map(xs:string, item()*), $mean as item() ) as map(xs:string, item()*) { $distribution=> map:put("mean", $mean)=> this:set-is-complex() }; (:~ : std() : Modify the distribution to set a standard deviation value (normal, skewed) : : @param $distribution: the distribution to modify : @param $std: standard deviation value :) declare function this:std( $distribution as map(xs:string, item()*), $std as item() ) as map(xs:string, item()*) { $distribution=> map:put("std", $std)=> this:set-is-complex() }; (:~ : skew() : Modify the distribution to set a skew value (skewed) : : @param $distribution: the distribution to modify : @param $skew: skew value :) declare function this:skew( $distribution as map(xs:string, item()*), $skew as item() ) as map(xs:string, item()*) { $distribution=> map:put("skew", $skew)=> this:set-is-complex() }; (:~ : p() : Modify the distribution to set a probability value (bernoulli, flip) : : @param $distribution: the distribution to modify : @param $p: the probability as a percent in [0,100] :) declare function this:p( $distribution as map(xs:string, item()*), $p as item() ) as map(xs:string, item()*) { $distribution=> map:put("p", $p)=> this:set-is-complex() }; (:~ : start() : Modify the distribution to set a start value (markov) : : @param $distribution: the distribution to modify : @param $start: the starting value of the chain :) declare function this:start( $distribution as map(xs:string, item()*), $start as item() ) as map(xs:string, item()*) { $distribution=> map:put("start", $start)=> this:set-is-complex() }; (:~ : keys() : Modify the distribution to set a set of keys (for distributions that : define an index). Number of keys should match index range. : : @param $distribution: the distribution to modify : @param $keys: the keys :) declare function this:keys( $distribution as map(xs:string, item()*), $keys as item()* ) as map(xs:string, item()*) { $distribution=> map:put("keys", $keys)=> this:set-is-complex() }; (:~ : is-complex() : Performance hint. If you use the constructors and modifiers in this : API you don't need this, but if you are constructing from a vanilla map, : and you know the distribution is or is not complex, set it so the : randomizer functions don't have to keep recalculating it. : : Don't be wrong here: thing will break. :) declare function this:is-complex( $distribution as map(xs:string, item()*), $is-complex as xs:boolean ) as map(xs:string, item()*) { $distribution=> map:put("is_complex", $is-complex) }; (:~ : set-is-complex() : Another performance hint, but does the looking for you. : Look in algorithm descriptor to see if it needs special handling. : The constructors and setters in this API do this automatically; you : don't need to. :) declare function this:set-is-complex( $distribution as map(xs:string, item()*) ) as map(xs:string, item()*) { $distribution=>map:put("is_complex", some $k in map:keys($distribution)[not(. = "keys")] satisfies ( ($distribution($k) instance of xs:QName) or ($distribution($k) instance of function(*)) ) ) }; (:~ : describe() : Dump the algorithm map in form suitable for debugging. : Function values will have their QNames dumped (if possible) : distribution: Distribution to use, one of "constant", "uniform", "normal", : "skewed", "bernoulli", "flip", "zipf", "markov", "sums", "multimodal", : "poisson", "binomial-poisson", "exponential", "binomial", "beta", : "gamma", "binomial-beta" : min: mimumum value (optional) : max: maximum value (optional) : max: n parameter (binomial, beta-binomial) : pre-multiplier: multiplier before min/max (optional) : post-multiplier: multiplier after min/max (optional) : post-shift: shift to add after min/max (optional) : cast: cast type (optional) : mean: mean of distribution (normal, skewed) (default=0) : std: standard deviation of distribution (normal, skewed) (default=mean) : skew: skew of distribution (skewed) (default=0) : p: probability (bernoulli, flip) (default=50) : sums: cumulative probability sums (zipf, markov) : alpha: alpha parameter (zipf) (needed if no sums) (default=0) : alpha parameter (beta, beta-binomial) : limit: number of sums (zipf) (needed if no sums) (default=1000) : start: index of starting symbol (markov) (default=uniform[1,dim]) : dim: size of each dimension of Markov matrix (markov) : matrix: raw Markov matrix (used if sums not provided, not recommended) : sums: cumulative probability sums (zipf, markov) : Single element, sequence of doubles : matrix: raw Markov matrix (used if sums not provided, not recommended) : Single element, sequence of doubles : distributions: component distributions (multimodal) : lambda: lambda parameter (exponential) : k: k parameter (gamma) : theta: theta parameter (gamma) : beta: beta parameter (beta, beta-binomial) :) declare function this:describe($dist as map(xs:string,item()*)) as xs:string { if (exists($dist("describe"))) then $dist("describe")($dist) else "["||$dist('distribution')||" "|| string-join(( this:describe("min",$dist), this:describe("max",$dist), this:describe("pre-multiplier",$dist), this:describe("post-multiplier",$dist), this:describe("post-shift",$dist), this:describe("cast",$dist), this:describe("mean",$dist), this:describe("std",$dist), this:describe("skew",$dist), this:describe("p",$dist), this:describe("alpha",$dist), this:describe("beta",$dist), this:describe("lambda",$dist), this:describe("k",$dist), this:describe("theta",$dist), this:describe("limit",$dist), this:describe("dim",$dist), this:describe("start",$dist), this:describe("sums",$dist,true()), this:describe("matrix",$dist,true()), this:describe("keys",$dist,true()) ), " " )|| (if ($dist=>map:contains("distributions")) then "[" else "")|| string-join( for $distribution in $dist("distributions") return this:describe($distribution), " " )|| (if ($dist=>map:contains("distributions")) then "]" else "")|| (if ($dist=>map:contains("selector")) then "@"||this:describe($dist("selector")) else "") }; declare variable $this:CRLF as xs:string := " "; declare %private function this:describe( $k as xs:string, $distribution as map(xs:string,item()*), $break as xs:boolean ) as xs:string { if ($distribution=>map:contains($k)) then ( let $v := $distribution=>map:get($k) return ( (if ($break) then $this:CRLF else "")|| $k||":"||util:quote($v)|| (if ($break) then $this:CRLF else "") ) ) else () }; declare %private function this:describe( $k as xs:string, $distribution as map(xs:string, item()*) ) as xs:string { this:describe($k, $distribution, false()) }; (: distributions.xqy :)