Source: R/crtree.R
crtree.Rd
Classification and regression trees based on the rpart package
crtree( dataset, rvar, evar, type = "", lev = "", wts = "None", minsplit = 2, minbucket = round(minsplit/3), cp = 0.001, pcp = NA, nodes = NA, K = 10, seed = 1234, split = "gini", prior = NA, adjprob = TRUE, cost = NA, margin = NA, check = "", data_filter = "", envir = parent.frame())
Arguments
dataset | Dataset |
---|---|
rvar | The response variable in the model |
evar | Explanatory variables in the model |
type | Model type (i.e., "classification" or "regression") |
lev | The level in the response variable defined as _success_ |
wts | Weights to use in estimation |
minsplit | The minimum number of observations that must exist in a node in order for a split to be attempted. |
minbucket | the minimum number of observations in any terminal <leaf> node. If only one of minbucket or minsplit is specified, the code either sets minsplit to minbucket*3 or minbucket to minsplit/3, as appropriate. |
cp | Minimum proportion of root node deviance required for split (default = 0.001) |
pcp | Complexity parameter to use for pruning |
nodes | Maximum size of tree in number of nodes to return |
K | Number of folds use in cross-validation |
seed | Random seed used for cross-validation |
split | Splitting criterion to use (i.e., "gini" or "information") |
prior | Adjust the initial probability for the selected level (e.g., set to .5 in unbalanced samples) |
adjprob | Setting a prior will rescale the predicted probabilities. Set adjprob to TRUE to adjust the probabilities back to their original scale after estimation |
cost | Cost for each treatment (e.g., mailing) |
margin | Margin associated with a successful treatment (e.g., a purchase) |
check | Optional estimation parameters (e.g., "standardize") |
data_filter | Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000") |
envir | Environment to extract data from |
Value
A list with all variables defined in crtree as an object of class tree
Details
See https://radiant-rstats.github.io/docs/model/crtree.html for an example in Radiant
See also
summary.crtree
to summarize results
plot.crtree
to plot results
predict.crtree
for prediction
Examples
crtree(titanic, "survived", c("pclass", "sex"), lev = "Yes") %>% summary()
#> Classification tree#> Data : titanic#> Response variable : survived#> Level : Yes in survived#> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob)#> * denotes terminal node#> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) *#> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) *#> 3) sex=male 657 135 No (0.20547945 0.79452055) *
result <- crtree(titanic, "survived", c("pclass", "sex")) %>% summary()
#> Classification tree#> Data : titanic#> Response variable : survived#> Level : Yes in survived#> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob)#> * denotes terminal node#> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) *#> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) *#> 3) sex=male 657 135 No (0.20547945 0.79452055) *
result <- crtree(diamonds, "price", c("carat", "clarity"), type = "regression") %>% str()
#> List of 29#> $ model :List of 18#> ..$ frame :'data.frame':41 obs. of 8 variables:#> .. ..$ var : chr [1:41] "carat" "carat" "carat" "<leaf>" ...#> .. ..$ n : int [1:41] 3000 1935 1392 976 416 543 385 158 1065 715 ...#> .. ..$ wt : num [1:41] 3000 1935 1392 976 416 ...#> .. ..$ dev : num [1:41] 4.70e+10 2.37e+09 3.80e+08 5.37e+07 8.61e+07 ...#> .. ..$ yval : num [1:41] 3907 1618 1050 779 1687 ...#> .. ..$ complexity: num [1:41] 0.608124 0.03408 0.005122 0.00032 0.000518 ...#> .. ..$ ncompete : int [1:41] 1 1 1 0 0 1 0 0 1 1 ...#> .. ..$ nsurrogate: int [1:41] 1 1 0 0 0 0 0 0 0 0 ...#> ..$ where : Named int [1:3000] 4 4 4 4 4 5 8 8 5 14 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ call : language (function (formula, data, weights, subset, na.action = na.rpart, method, model = FALSE, x = FALSE, y = TRUE,| __truncated__ ...#> ..$ terms :Classes 'terms', 'formula' language price ~ carat + clarity#> .. .. ..- attr(*, "variables")= language list(price, carat, clarity)#> .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1#> .. .. .. ..- attr(*, "dimnames")=List of 2#> .. .. .. .. ..$ : chr [1:3] "price" "carat" "clarity"#> .. .. .. .. ..$ : chr [1:2] "carat" "clarity"#> .. .. ..- attr(*, "term.labels")= chr [1:2] "carat" "clarity"#> .. .. ..- attr(*, "order")= int [1:2] 1 1#> .. .. ..- attr(*, "intercept")= int 1#> .. .. ..- attr(*, "response")= int 1#> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> .. .. ..- attr(*, "predvars")= language list(price, carat, clarity)#> .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor"#> .. .. .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity"#> ..$ cptable : num [1:21, 1:5] 0.6081 0.1838 0.0341 0.0299 0.0292 ...#> .. ..- attr(*, "dimnames")=List of 2#> .. .. ..$ : chr [1:21] "1" "2" "3" "4" ...#> .. .. ..$ : chr [1:5] "CP" "nsplit" "rel error" "xerror" ...#> ..$ method : chr "anova"#> ..$ parms : NULL#> ..$ control :List of 9#> .. ..$ minsplit : num 2#> .. ..$ minbucket : num 1#> .. ..$ cp : num 0.001#> .. ..$ maxcompete : int 4#> .. ..$ maxsurrogate : int 5#> .. ..$ usesurrogate : int 2#> .. ..$ surrogatestyle: int 0#> .. ..$ maxdepth : int 30#> .. ..$ xval : num 10#> ..$ functions :List of 2#> .. ..$ summary:function (yval, dev, wt, ylevel, digits) #> .. ..$ text :function (yval, dev, wt, ylevel, digits, n, use.n) #> ..$ numresp : int 1#> ..$ splits : num [1:44, 1:5] 3000 3000 0 1935 1935 ...#> .. ..- attr(*, "dimnames")=List of 2#> .. .. ..$ : chr [1:44] "carat" "clarity" "clarity" "carat" ...#> .. .. ..$ : chr [1:5] "count" "ncat" "improve" "index" ...#> ..$ csplit : int [1:21, 1:8] 1 3 3 3 2 1 1 1 1 1 ...#> ..$ variable.importance: Named num [1:2] 4.10e+10 5.97e+09#> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity"#> ..$ y : Named int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ ordered : Named logi [1:2] FALSE FALSE#> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity"#> ..$ residuals : Named num [1:3000] -199 -129 -149 -73 301 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ model : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame)#> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ...#> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ...#> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__#> ..$ var_types : Named chr [1:3] "integer" "numeric" "factor"#> .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity"#> ..- attr(*, "xlevels")=List of 1#> .. ..$ clarity: chr [1:8] "I1" "SI2" "SI1" "VS2" ...#> ..- attr(*, "class")= chr "rpart"#> $ crtree_input:List of 6#> ..$ formula:Class 'formula' language price ~ .#> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> ..$ data : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame)#> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ...#> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ...#> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__#> ..$ method : chr "anova"#> ..$ parms :List of 1#> .. ..$ split: chr "gini"#> ..$ weights: NULL#> ..$ control:List of 9#> .. ..$ minsplit : num 2#> .. ..$ minbucket : num 1#> .. ..$ cp : num 0.001#> .. ..$ maxcompete : int 4#> .. ..$ maxsurrogate : int 5#> .. ..$ usesurrogate : int 2#> .. ..$ surrogatestyle: int 0#> .. ..$ maxdepth : int 30#> .. ..$ xval : num 10#> $ parms :List of 1#> ..$ split: chr "gini"#> $ control :List of 9#> ..$ minsplit : num 2#> ..$ minbucket : num 1#> ..$ cp : num 0.001#> ..$ maxcompete : int 4#> ..$ maxsurrogate : int 5#> ..$ usesurrogate : int 2#> ..$ surrogatestyle: int 0#> ..$ maxdepth : int 30#> ..$ xval : num 10#> $ form : chr "price ~ . "#> $ method : chr "anova"#> $ rv : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> $ not_vary : chr(0) #> $ df_name : chr "diamonds"#> $ vars : chr [1:2] "carat" "clarity"#> $ rvar : chr "price"#> $ evar : chr [1:2] "carat" "clarity"#> $ type : chr "regression"#> $ lev : chr ""#> $ wts : NULL#> $ minsplit : num 2#> $ minbucket : num 1#> $ cp : num 0.001#> $ pcp : logi NA#> $ nodes : logi NA#> $ K : num 10#> $ seed : num 1234#> $ split : chr "gini"#> $ prior : logi NA#> $ adjprob : logi TRUE#> $ cost : logi NA#> $ margin : logi NA#> $ check : chr ""#> $ data_filter : chr ""#> - attr(*, "class")= chr [1:3] "crtree" "model" "list"