Оболочка base R reshape для простоты использования

Общепризнано, что команда base reshape R является быстрой и мощной, но имеет жалкий синтаксис. Поэтому я написал небольшую обертку вокруг него, которую добавлю в следующий выпуск пакета taRifx. Однако прежде чем я это сделаю, я хочу добиться улучшений.

Вот моя версия с обновлениями от @RichieCotton:

# reshapeasy: Version of reshape with way, way better syntax
 # Written with the help of the StackOverflow R community
 # x is a data.frame to be reshaped
 # direction is "wide" or "long"
 # vars are the names of the (stubs of) the variables to be reshaped (if omitted, defaults to everything not in id or vary)
 # id are the names of the variables that identify unique observations
 # vary is the variable that varies.  Going to wide this variable will cease to exist.  Going to long it will be created.
 # omit is a vector of characters which are to be omitted if found at the end of variable names (e.g. price_1 becomes price in long)
 # ... are options to be passed to stats::reshape
reshapeasy <- function( data, direction, id=(sapply(data,is.factor) | sapply(data,is.character)), vary=sapply(data,is.numeric), omit=c("_","."), vars=NULL, ... ) {
  if(direction=="wide") data <- stats::reshape( data=data, direction=direction, idvar=id, timevar=vary, ... )
  if(direction=="long") {
    varying <- which(!(colnames(data) %in% id))
    data <- stats::reshape( data=data, direction=direction, idvar=id, varying=varying, timevar=vary, ... )
  }
  colnames(data) <- gsub( paste("[",paste(omit,collapse="",sep=""),"]$",sep=""), "", colnames(data) )
  return(data)
}

Обратите внимание, что вы можете переходить от широкого к длинному, не меняя параметры, кроме направления. Для меня это ключ к удобству использования.

Я буду рад указать в файлах справки по функциям любые существенные улучшения, если вы сообщите мне свою информацию в чате или по электронной почте.

Усовершенствования могут коснуться следующих областей:

  • Именование функции и ее аргументов.
  • Сделать ее более общей (в настоящее время она обрабатывает довольно специфический случай, который, как мне кажется, является наиболее распространенным, но еще не исчерпаны возможности stats::reshape)
  • Улучшения кода

Примеры

Демонстрационные данные

x.wide <- structure(list(surveyNum = 1:6, pio_1 = structure(c(2L, 2L, 1L, 
2L, 1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), pio_2 = structure(c(2L, 1L, 2L, 1L, 
2L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), pio_3 = structure(c(2L, 2L, 1L, 1L, 
2L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), caremgmt_1 = structure(c(2L, 1L, 1L, 
2L, 1L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), caremgmt_2 = structure(c(1L, 2L, 2L, 
2L, 2L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), caremgmt_3 = structure(c(1L, 2L, 1L, 
2L, 1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), prev_1 = structure(c(1L, 2L, 2L, 1L, 
1L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), prev_2 = structure(c(2L, 2L, 1L, 2L, 
1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), prev_3 = structure(c(2L, 1L, 2L, 2L, 
1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2"), class = "factor"), price_1 = structure(c(2L, 1L, 2L, 5L, 
3L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2", "3", "4", "5", "6"), class = "factor"), price_2 = structure(c(6L, 
5L, 5L, 4L, 4L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2", "3", "4", "5", "6"), class = "factor"), price_3 = structure(c(3L, 
5L, 2L, 5L, 4L, 5L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1", 
"2", "3", "4", "5", "6"), class = "factor")), .Names = c("surveyNum", 
"pio_1", "pio_2", "pio_3", "caremgmt_1", "caremgmt_2", "caremgmt_3", 
"prev_1", "prev_2", "prev_3", "price_1", "price_2", "price_3"
), idvars = "surveyNum", rdimnames = list(structure(list(surveyNum = 1:24), .Names = "surveyNum", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"
), class = "data.frame"), structure(list(variable = structure(c(1L, 
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("pio", 
"caremgmt", "prev", "price"), class = "factor"), .id = c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)), .Names = c("variable", 
".id"), row.names = c("pio_1", "pio_2", "pio_3", "caremgmt_1", 
"caremgmt_2", "caremgmt_3", "prev_1", "prev_2", "prev_3", "price_1", 
"price_2", "price_3"), class = "data.frame")), row.names = c(NA, 
6L), class = c("cast_df", "data.frame"))

x.long <- structure(list(.id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), pio = structure(c(2L, 
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 
1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"), 
    caremgmt = structure(c(2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L), .Label = c("1", "2"), class = "factor"), prev = structure(c(1L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("1", 
    "2"), class = "factor"), price = structure(c(2L, 1L, 2L, 
    5L, 3L, 4L, 1L, 5L, 4L, 3L, 1L, 2L, 6L, 6L, 5L, 4L, 6L, 3L, 
    5L, 6L, 3L, 1L, 2L, 4L, 3L, 5L, 2L, 5L, 4L, 5L, 6L, 6L, 4L, 
    6L, 4L, 1L, 2L, 3L, 1L, 2L, 2L, 5L, 1L, 6L, 1L, 3L, 4L, 3L, 
    6L, 5L, 5L, 4L, 4L, 2L, 2L, 2L, 6L, 3L, 1L, 4L, 4L, 5L, 1L, 
    3L, 6L, 1L, 3L, 5L, 1L, 3L, 6L, 2L), .Label = c("1", "2", 
    "3", "4", "5", "6"), class = "factor"), surveyNum = c(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 
    3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
    16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
    17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L)), .Names = c(".id", 
"pio", "caremgmt", "prev", "price", "surveyNum"), row.names = c(NA, 
-72L), class = "data.frame")

Примеры

> x.wide
  surveyNum pio_1 pio_2 pio_3 caremgmt_1 caremgmt_2 caremgmt_3 prev_1 prev_2 prev_3 price_1 price_2 price_3
1         1     2     2     2          2          1          1      1      2      2       2       6       3
2         2     2     1     2          1          2          2      2      2      1       1       5       5
3         3     1     2     1          1          2          1      2      1      2       2       5       2
4         4     2     1     1          2          2          2      1      2      2       5       4       5
5         5     1     2     2          1          2          1      1      1      1       3       4       4
6         6     1     2     1          2          1          1      2      1      1       4       2       5
> reshapeasy( x.wide, "long", NULL, id="surveyNum", vary="id", sep="_" )
    surveyNum id pio caremgmt prev price
1.1         1  1   2        2    1     2
2.1         2  1   2        1    2     1
3.1         3  1   1        1    2     2
4.1         4  1   2        2    1     5
5.1         5  1   1        1    1     3
6.1         6  1   1        2    2     4
1.2         1  2   2        1    2     6
2.2         2  2   1        2    2     5
3.2         3  2   2        2    1     5
4.2         4  2   1        2    2     4
5.2         5  2   2        2    1     4
6.2         6  2   2        1    1     2
1.3         1  3   2        1    2     3
2.3         2  3   2        2    1     5
3.3         3  3   1        1    2     2
4.3         4  3   1        2    2     5
5.3         5  3   2        1    1     4
6.3         6  3   1        1    1     5

> head(x.long)
  .id pio caremgmt prev price surveyNum
1   1   2        2    1     2         1
2   1   2        1    2     1         2
3   1   1        1    2     2         3
4   1   2        2    1     5         4
5   1   1        1    1     3         5
6   1   1        2    2     4         6

> head(reshapeasy( x.long, direction="wide", id="surveyNum", vary=".id" ))
  surveyNum pio.1 caremgmt.1 prev.1 price.1 pio.3 caremgmt.3 prev.3 price.3 pio.2 caremgmt.2 prev.2 price.2
1         1     2          2      1       2     2          1      2       3     2          1      2       6
2         2     2          1      2       1     2          2      1       5     1          2      2       5
3         3     1          1      2       2     1          1      2       2     2          2      1       5
4         4     2          2      1       5     1          2      2       5     1          2      2       4
5         5     1          1      1       3     2          1      1       4     2          2      1       4
6         6     1          2      2       4     1          1      1       5     2          1      1       2
14
задан Ari B. Friedman 9 April 2012 в 11:37
поделиться