Untitled diff
39 removals
115 lines
42 additions
121 lines
<!--head
<!--head
Title: Correlations
Title: Korrelációs együtthatók
Author: Daróczi Gergely
Author: Daróczi Gergely
Email: gergely@snowl.net
Email: gergely@snowl.net
Description: This template will return the correlation matrix of supplied numerical variables.
Description: Folytonos változók közötti lineáris összefüggések vizsgálata. ## TODO: update
Data required: TRUE
Data required: TRUE
Example: rapport('correlations', data=ius2008, vars=c('age', 'edu'))
Strict: TRUE
rapport('correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
Example: rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu'))
rapport('correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
rapport('i18n/hu/correlations', data=ius2008, vars=c('age', 'edu', 'leisure'))
rapport('i18n/hu/correlations', data=mtcars, vars=c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'))
vars | *numeric[2,50]| Variable | Numerical variables
vars | *numeric[2,50]| Változók | Folytonos változók
cor.matrix | TRUE | Correlation matrix | Show correlation matrix (numbers)?
cor.matrix | TRUE | Korrelációs mátrix | Korrelációs mátrix hozzáadása
cor.plot | TRUE | Scatterplot matrix | Show scatterplot matrix (image)?
cor.plot | TRUE | Pontdiagram | Pontdiagram hozzáadása
quick.plot | TRUE | Using a sample for plotting | If set to TRUE, the scatterplot matrix will be drawn on a sample size of max. 1000 cases not to render millions of points.
quick.plot | TRUE | Minta ábrázolása | A teljes adatbázis helyett egy maximum 1000 fős minta kerül ábrázolásra.
head-->
head-->
# Variable description
<%
## setting Hungarian locale and returning NULL not be exported to report
options('p.copula' = 'és'); NULL
%>
<%=length(vars)%> variables provided.
# Változó-információk
<%=length(vars)%> változó vizsgálata:
<%=
<%=
cm <- cor(vars, use = 'complete.obs')
cm <- cor(vars, use = 'complete.obs')
diag(cm) <- NA
diag(cm) <- NA
%>
%>
<%if (length(vars) >2 ) {%>
<%if (length(vars) >2 ) {%>
The highest correlation coefficient (<%=max(cm, na.rm=T)%>) is between <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%> and the lowest (<%=min(cm, na.rm=T)%>) is between <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%>. It seems that the strongest association (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) is between <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%>.
A legmagasabb korrelációs együtthatót (<%=max(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == max(cm, na.rm=T), arr.ind=T))[1:2]%>, és a legalacsonyabb értéket (<%=min(cm, na.rm=T)%>) a(z) <%=row.names(which(cm == min(cm, na.rm=T), arr.ind=T))[1:2]%> változók között találjuk. Úgy tűnik, hogy a legerősebb kapcsolat (r=<%=cm[which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T)][1]%>) a(z) <%=row.names(which(abs(cm) == max(abs(cm), na.rm=T), arr.ind=T))[1:2]%> változók között található.
<%}%>
<%}%>
<%
<%
cm[upper.tri(cm)] <- NA
cm[upper.tri(cm)] <- NA
h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
h <- which((cm > 0.7) | (cm < -0.7), arr.ind=T)
if (nrow(h) > 0) {
if (nrow(h) > 0) {
%>
%>
Highly correlated (r < -0.7 or r > 0.7) variables:
Erős összefüggést mutató (r < -0.7 or r > 0.7) változók:
<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
<%=paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')%>
<%} else {%>
<%} else {%>
There are no highly correlated (r < -0.7 or r > 0.7) variables.
Nincsenek erős összefüggést mutató (r < -0.7 or r > 0.7) változók.
<%}%>
<%}%>
<%
<%
h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
h <- which((cm < 0.2)&(cm > -0.2), arr.ind=T)
if (nrow(h) > 0) {
if (nrow(h) > 0) {
%>
%>
Uncorrelated (-0.2 < r < 0.2) variables:
Korrelálatlan (-0.2 < r < 0.2) változók:
<%=
<%=
if (nrow(h) > 0)
if (nrow(h) > 0)
paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
paste(pander.return(lapply(1:nrow(h), function(i) paste0(p(c(rownames(cm)[h[i,1]], colnames(cm)[h[i,2]])), ' (', round(cm[h[i, 1], h[i, 2]], 2), ')'))), collapse = '\n')
%>
%>
<%} else {%>
<%} else {%>
There are no uncorrelated correlated (r < -0.2 or r > 0.2) variables.
Nincsenek korrelálatlan (-0.2 < r < 0.2) változók.
<%}%>
<%}%>
## <%=if (cor.matrix) 'Correlation matrix'%>
## <%=if (cor.matrix) 'Korrelációs mátrix'%>
<%=
<%=
if (cor.matrix) {
if (cor.matrix) {
set.caption('Correlation matrix')
set.caption('Correlation matrix')
cm <- round(cor(vars, use = 'complete.obs'), 4)
cm <- round(cor(vars, use = 'complete.obs'), 4)
d <- attributes(cm)
d <- attributes(cm)
for (row in attr(cm, 'dimnames')[[1]])
for (row in attr(cm, 'dimnames')[[1]])
for (col in attr(cm, 'dimnames')[[2]]) {
for (col in attr(cm, 'dimnames')[[2]]) {
test.p <- cor.test(vars[, row], vars[, col])$p.value
test.p <- cor.test(vars[, row], vars[, col])$p.value
cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
cm[row, col] <- paste(cm[row, col], ' ', ifelse(test.p > 0.05, '', ifelse(test.p > 0.01, ' ★', ifelse(test.p > 0.001, ' ★★', ' ★★★'))), sep='')
}
}
diag(cm) <- ''
diag(cm) <- ''
set.alignment('centre', 'right')
set.alignment('centre', 'right')
as.data.frame(cm)
as.data.frame(cm)
}
}
%>
%>
Where the stars represent the [significance levels](http://en.wikipedia.org/wiki/Statistical_significance) of the bivariate correlation coefficients: one star for `0.05`, two for `0.01` and three for `0.001`.
Ahol a csillagok száma a [szignifikancia szintet](http://en.wikipedia.org/wiki/Statistical_significance) jelöli: egy csillag `0,05`, kettő `0,01` és három csillag `0.001` p értéknél.
<%=
<%=
if (cor.plot) {
if (cor.plot) {
labels <- lapply(vars, rp.name)
labels <- lapply(vars, rp.name)
if (quick.plot)
if (quick.plot)
if (nrow(vars) > 1000)
if (nrow(vars) > 1000)
vars <- vars[sample(1:nrow(vars), size = 1000), ]
vars <- vars[sample(1:nrow(vars), size = 1000), ]
## custom panels
## custom panels
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
## forked from ?pairs
## forked from ?pairs
par(usr = c(0, 1, 0, 1))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = 'complete.obs')
r <- cor(x, y, use = 'complete.obs')
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
txt <- paste(prefix, txt, sep = "")
if(missing(cex.cor))
if(missing(cex.cor))
cex <- 0.8/strwidth(txt)
cex <- 0.8/strwidth(txt)
test <- cor.test(x,y)
test <- cor.test(x,y)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
symbols = c("***", "**", "*", ".", " "))
symbols = c("***", "**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
text(0.5, 0.5, txt, cex = cex * abs(r) * 1.5)
text(.8, .8, Signif, cex = cex, col = 2)
text(.8, .8, Signif, cex = cex, col = 2)
}
}
## plot
## plot
set.caption(sprintf('Scatterplot matrix%s', ifelse(quick.plot, ' (based on a sample size of 1000)', '')))
set.caption(sprintf('Pontdiagram%s', ifelse(quick.plot, ' (n = 1000)', '')))
pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
pairs(vars, lower.panel = 'panel.smooth', upper.panel = 'panel.cor', labels = labels)
}
}
%>
%>