Classification and regression trees based on the rpart package — crtree (2024)

Source: R/crtree.R


Classification and regression trees based on the rpart package

crtree( dataset, rvar, evar, type = "", lev = "", wts = "None", minsplit = 2, minbucket = round(minsplit/3), cp = 0.001, pcp = NA, nodes = NA, K = 10, seed = 1234, split = "gini", prior = NA, adjprob = TRUE, cost = NA, margin = NA, check = "", data_filter = "", envir = parent.frame())





The response variable in the model


Explanatory variables in the model


Model type (i.e., "classification" or "regression")


The level in the response variable defined as _success_


Weights to use in estimation


The minimum number of observations that must exist in a node in order for a split to be attempted.


the minimum number of observations in any terminal <leaf> node. If only one of minbucket or minsplit is specified, the code either sets minsplit to minbucket*3 or minbucket to minsplit/3, as appropriate.


Minimum proportion of root node deviance required for split (default = 0.001)


Complexity parameter to use for pruning


Maximum size of tree in number of nodes to return


Number of folds use in cross-validation


Random seed used for cross-validation


Splitting criterion to use (i.e., "gini" or "information")


Adjust the initial probability for the selected level (e.g., set to .5 in unbalanced samples)


Setting a prior will rescale the predicted probabilities. Set adjprob to TRUE to adjust the probabilities back to their original scale after estimation


Cost for each treatment (e.g., mailing)


Margin associated with a successful treatment (e.g., a purchase)


Optional estimation parameters (e.g., "standardize")


Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")


Environment to extract data from


A list with all variables defined in crtree as an object of class tree


See for an example in Radiant

See also

summary.crtree to summarize results

plot.crtree to plot results

predict.crtree for prediction


crtree(titanic, "survived", c("pclass", "sex"), lev = "Yes") %>% summary()

#> Classification tree#> Data : titanic#> Response variable : survived#> Level : Yes in survived#> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob)#> * denotes terminal node#> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) *#> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) *#> 3) sex=male 657 135 No (0.20547945 0.79452055) *

result <- crtree(titanic, "survived", c("pclass", "sex")) %>% summary()

#> Classification tree#> Data : titanic#> Response variable : survived#> Level : Yes in survived#> Explanatory variables: pclass, sex #> Complexity parameter : 0.001 #> Minimum observations : 2 #> Nr obs : 1,043 #> #> node), split, n, loss, yval, (yprob)#> * denotes terminal node#> #> 1) root 1043 425 No (0.40747843 0.59252157) #> 2) sex=female 386 96 Yes (0.75129534 0.24870466) #> 4) pclass=1st,2nd 234 16 Yes (0.93162393 0.06837607) *#> 5) pclass=3rd 152 72 No (0.47368421 0.52631579) *#> 3) sex=male 657 135 No (0.20547945 0.79452055) *

result <- crtree(diamonds, "price", c("carat", "clarity"), type = "regression") %>% str()

#> List of 29#> $ model :List of 18#> ..$ frame :'data.frame':41 obs. of 8 variables:#> .. ..$ var : chr [1:41] "carat" "carat" "carat" "<leaf>" ...#> .. ..$ n : int [1:41] 3000 1935 1392 976 416 543 385 158 1065 715 ...#> .. ..$ wt : num [1:41] 3000 1935 1392 976 416 ...#> .. ..$ dev : num [1:41] 4.70e+10 2.37e+09 3.80e+08 5.37e+07 8.61e+07 ...#> .. ..$ yval : num [1:41] 3907 1618 1050 779 1687 ...#> .. ..$ complexity: num [1:41] 0.608124 0.03408 0.005122 0.00032 0.000518 ...#> .. ..$ ncompete : int [1:41] 1 1 1 0 0 1 0 0 1 1 ...#> .. ..$ nsurrogate: int [1:41] 1 1 0 0 0 0 0 0 0 0 ...#> ..$ where : Named int [1:3000] 4 4 4 4 4 5 8 8 5 14 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ call : language (function (formula, data, weights, subset, na.action = na.rpart, method, model = FALSE, x = FALSE, y = TRUE,| __truncated__ ...#> ..$ terms :Classes 'terms', 'formula' language price ~ carat + clarity#> .. .. ..- attr(*, "variables")= language list(price, carat, clarity)#> .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1#> .. .. .. ..- attr(*, "dimnames")=List of 2#> .. .. .. .. ..$ : chr [1:3] "price" "carat" "clarity"#> .. .. .. .. ..$ : chr [1:2] "carat" "clarity"#> .. .. ..- attr(*, "term.labels")= chr [1:2] "carat" "clarity"#> .. .. ..- attr(*, "order")= int [1:2] 1 1#> .. .. ..- attr(*, "intercept")= int 1#> .. .. ..- attr(*, "response")= int 1#> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> .. .. ..- attr(*, "predvars")= language list(price, carat, clarity)#> .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor"#> .. .. .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity"#> ..$ cptable : num [1:21, 1:5] 0.6081 0.1838 0.0341 0.0299 0.0292 ...#> .. ..- attr(*, "dimnames")=List of 2#> .. .. ..$ : chr [1:21] "1" "2" "3" "4" ...#> .. .. ..$ : chr [1:5] "CP" "nsplit" "rel error" "xerror" ...#> ..$ method : chr "anova"#> ..$ parms : NULL#> ..$ control :List of 9#> .. ..$ minsplit : num 2#> .. ..$ minbucket : num 1#> .. ..$ cp : num 0.001#> .. ..$ maxcompete : int 4#> .. ..$ maxsurrogate : int 5#> .. ..$ usesurrogate : int 2#> .. ..$ surrogatestyle: int 0#> .. ..$ maxdepth : int 30#> .. ..$ xval : num 10#> ..$ functions :List of 2#> .. ..$ summary:function (yval, dev, wt, ylevel, digits) #> .. ..$ text :function (yval, dev, wt, ylevel, digits, n, use.n) #> ..$ numresp : int 1#> ..$ splits : num [1:44, 1:5] 3000 3000 0 1935 1935 ...#> .. ..- attr(*, "dimnames")=List of 2#> .. .. ..$ : chr [1:44] "carat" "clarity" "clarity" "carat" ...#> .. .. ..$ : chr [1:5] "count" "ncat" "improve" "index" ...#> ..$ csplit : int [1:21, 1:8] 1 3 3 3 2 1 1 1 1 1 ...#> ..$ variable.importance: Named num [1:2] 4.10e+10 5.97e+09#> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity"#> ..$ y : Named int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ ordered : Named logi [1:2] FALSE FALSE#> .. ..- attr(*, "names")= chr [1:2] "carat" "clarity"#> ..$ residuals : Named num [1:3000] -199 -129 -149 -73 301 ...#> .. ..- attr(*, "names")= chr [1:3000] "1" "2" "3" "4" ...#> ..$ model : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame)#> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ...#> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ...#> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__#> ..$ var_types : Named chr [1:3] "integer" "numeric" "factor"#> .. ..- attr(*, "names")= chr [1:3] "price" "carat" "clarity"#> ..- attr(*, "xlevels")=List of 1#> .. ..$ clarity: chr [1:8] "I1" "SI2" "SI1" "VS2" ...#> ..- attr(*, "class")= chr "rpart"#> $ crtree_input:List of 6#> ..$ formula:Class 'formula' language price ~ .#> .. .. ..- attr(*, ".Environment")=<environment: 0x10a71ec0> #> ..$ data : tibble [3,000 × 3] (S3: tbl_df/tbl/data.frame)#> .. ..$ price : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> .. ..$ carat : num [1:3000] 0.32 0.34 0.3 0.35 0.4 0.6 0.88 0.93 0.51 1.01 ...#> .. ..$ clarity: Factor w/ 8 levels "I1","SI2","SI1",..: 5 3 4 6 4 7 3 3 6 2 ...#> .. ..- attr(*, "description")= chr "## Diamond prices\n\nPrices of 3,000 round cut diamonds\n\n### Description\n\nA dataset containing the prices a"| __truncated__#> ..$ method : chr "anova"#> ..$ parms :List of 1#> .. ..$ split: chr "gini"#> ..$ weights: NULL#> ..$ control:List of 9#> .. ..$ minsplit : num 2#> .. ..$ minbucket : num 1#> .. ..$ cp : num 0.001#> .. ..$ maxcompete : int 4#> .. ..$ maxsurrogate : int 5#> .. ..$ usesurrogate : int 2#> .. ..$ surrogatestyle: int 0#> .. ..$ maxdepth : int 30#> .. ..$ xval : num 10#> $ parms :List of 1#> ..$ split: chr "gini"#> $ control :List of 9#> ..$ minsplit : num 2#> ..$ minbucket : num 1#> ..$ cp : num 0.001#> ..$ maxcompete : int 4#> ..$ maxsurrogate : int 5#> ..$ usesurrogate : int 2#> ..$ surrogatestyle: int 0#> ..$ maxdepth : int 30#> ..$ xval : num 10#> $ form : chr "price ~ . "#> $ method : chr "anova"#> $ rv : int [1:3000] 580 650 630 706 1080 3082 3328 4229 1895 3546 ...#> $ not_vary : chr(0) #> $ df_name : chr "diamonds"#> $ vars : chr [1:2] "carat" "clarity"#> $ rvar : chr "price"#> $ evar : chr [1:2] "carat" "clarity"#> $ type : chr "regression"#> $ lev : chr ""#> $ wts : NULL#> $ minsplit : num 2#> $ minbucket : num 1#> $ cp : num 0.001#> $ pcp : logi NA#> $ nodes : logi NA#> $ K : num 10#> $ seed : num 1234#> $ split : chr "gini"#> $ prior : logi NA#> $ adjprob : logi TRUE#> $ cost : logi NA#> $ margin : logi NA#> $ check : chr ""#> $ data_filter : chr ""#> - attr(*, "class")= chr [1:3] "crtree" "model" "list"

