Έκδοση: 16 / 11 / 2024
Προετοιμασία περιβάλλοντος R
Οι παρακάτω εντολές, ελέγχουν αν έχουν εγκατασταθεί τα απαιτούμενα
πακέτα για την εκτέλεση του συνόλου του κώδικα της ενότητας.
list.of.packages <- c("latex2exp", "ppcor", "tseries", "trend", "fitdistrplus", "logspline", "gamlss", "gamlss.dist", "gamlss.add", "forecast", "astsa", "fpp","fpp2", "TSA", 'tsbox', 'KFAS', 'dlm', "Hmisc", "tidyverse", "htmlTable", "expss", "Hmisc", "tibble", "dplyr", "scipub", "stringi", "insight", "multcompView", "grid", "gridExtra", "ggfortify", "effects", "desk", "haven", "afex", "RColorBrewer", "ggplot2", "plotrix", "hrbrthemes", "viridis", "tidyr", "pROC", "corrr", "ggcorrplot", "psych", "labelled", "e1071", "dendextend")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
Σημαντική Σημείωση
Όλες οι συναρτήσεις που χρησιμοποιούνται βρίσκονται στο αρχείο της
γλώσσας R:
MyRFunctions.R.
Για να γίνουν διαθέσιμες για χρήση αρκεί να φορτωθούν στο περιβάλλον
της R, εκτελώντας την εντολή
source("https://utopia.duth.gr/epdiaman/files/kedivim/MyRFunctions.R")
(Στην περίπτωση όπου γίνει λήψη τοπικά το αρχείο MyRFunctions.R,
πρέπει να αλλαξει ανάλογα και η διεύθυνση του αρχείου)
Διαχείριση
δεδομένων
Μετατροπή όλων των
στηλών με λίγες τιμές σε παράγοντες (συνάρτηση my_create_factors)
Παράδειγμα
success = c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1)
gender = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
my.data.frame = data.frame(success, gender)
summary(my.data.frame)
## success gender
## Min. :0.00 Min. :0.0000
## 1st Qu.:0.75 1st Qu.:0.0000
## Median :1.00 Median :0.0000
## Mean :0.75 Mean :0.4167
## 3rd Qu.:1.00 3rd Qu.:1.0000
## Max. :1.00 Max. :1.0000
my.data.frame.2 = my_create_factors(my.data.frame)
summary(my.data.frame.2)
## success gender
## 0:3 0:7
## 1:9 1:5
Ανάκτηση όλων των
παραγόντων από ένα dataframe (συνάρτηση get_factors)
Παράδειγμα
exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)
print(get_factors(my.data.frame))
## [1] "gender"
Ανάκτηση ετικέτας
μεταβλητής (labels) (συναρτήσεις get_label και get_df_labels)
Ανάκτηση μίας ή
περισσοτέρων ετικετών
Παράδειγμα
exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)
Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
print(get_label(my.data.frame[, 'gender']))
## [1] "Φύλο"
print(get_df_labels(my.data.frame, 'gender'))
## [1] "Φύλο"
print(get_df_labels(my.data.frame))
## [1] "Βαθμός εξετάσεων" "Φύλο" "age"
Μετατροπή vector ή
μεμονωμένου string για εμφάνιση σε html μορφή (συνάρτηση
prepare_vector_for_HTML_output)
Παράδειγμα
all.inequalities = c("1 > 0", "3 < 5")
print(prepare_vector_for_HTML_output(all.inequalities))
## [1] "1 > 0" "3 < 5"
Μετατροπή των labels
ενός dataframe για εμφάνιση σε html μορφή (συνάρτηση
prepare_data_for_HTML_output)
Παράδειγμα
exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
agecat = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("0 - 35", ">35"))
my.data.frame = data.frame(exam, agecat)
print(my.data.frame$agecat)
## [1] 0 - 35 0 - 35 >35 0 - 35 >35 0 - 35 >35 0 - 35 >35 0 - 35
## [11] >35 0 - 35
## Levels: 0 - 35 >35
Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$agecat) = 'Ηλικιακή κατηγορία'
my.data.frame.2 = prepare_data_for_HTML_output(my.data.frame.2)
print(my.data.frame.2$agecat)
## NULL
Descriptive
Statistics
Καταμέτρηση
Παρατηρήσεων (συνάρτηση case_report)
Παράδειγμα
sample.data = c(1, NA, 0, 4, 6, 1, NA, 1, 1, 0, 1, 1)
my_case_report(sample.data)
## $number.of.NA
## [1] 2
##
## $valid.cases
## [1] 10
##
## $total.cases
## [1] 12
##
## $report.str
## [1] "12 (2 NA, 10 valid)"
Πίνακας συχνοτήτων
(συνάρτηση fre)
library(expss)
colors1 = c("Κόκκινο", "Μπλε", "Πράσινο", "Κόκκινο", "Μπλε", "Κόκκινο")
htmlTable(fre(colors1))
colors1
|
Count
|
Valid percent
|
Percent
|
Responses, %
|
Cumulative responses, %
|
Κόκκινο
|
3
|
50.0
|
50.0
|
50.0
|
50.0
|
Μπλε
|
2
|
33.3
|
33.3
|
33.3
|
83.3
|
Πράσινο
|
1
|
16.7
|
16.7
|
16.7
|
100.0
|
#Total
|
6
|
100
|
100
|
100
|
|
<NA>
|
0
|
|
0.0
|
|
|
Πίνακας συχνοτήτων
πολλών μεταβλητών (συνάρτηση my_frequency_table)
Παράδειγμα
success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
my.data.frame = data.frame(success, gender)
Hmisc::label(my.data.frame$success) = 'Αποτέλεσμα'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
my_frequency_table(my.data.frame, c('success', 'gender'))
Αποτέλεσμα (success)
|
Value
|
Count
|
Valid percent
|
Percent
|
Responses, %
|
Cumulative responses, %
|
Αποτυχία
|
3
|
25
|
25
|
25
|
25
|
Επιτυχία
|
9
|
75
|
75
|
75
|
100
|
#Total
|
12
|
100
|
100
|
100
|
|
<NA>
|
0
|
|
0
|
|
|
Φύλο (gender)
|
Value
|
Count
|
Valid percent
|
Percent
|
Responses, %
|
Cumulative responses, %
|
Γυναίκα
|
7
|
58.3
|
58.3
|
58.3
|
58.3
|
Άνδρας
|
5
|
41.7
|
41.7
|
41.7
|
100.0
|
#Total
|
12
|
100
|
100
|
100
|
|
<NA>
|
0
|
|
0.0
|
|
|
Πίνακας συμπτώσεων 2
ή περισσοτέρων μεταβλητών (συνάρτηση my_contigency_table)
Παράδειγμα
success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
residence = factor(c(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1), levels = c(0, 1), labels = c("Μικρή πόλη", "Μεγάλη πόλη"))
my.data.frame = data.frame(success, gender, residence)
my_contigency_table(my.data.frame, c('gender', 'success'))
Cross tabulation of gender, success
|
|
success
|
|
|
|
Αποτυχία
|
Επιτυχία
|
|
Sum
|
gender
|
Γυναίκα
|
1
|
6
|
|
7
|
Άνδρας
|
2
|
3
|
|
5
|
Sum
|
3
|
9
|
|
12
|
my_contigency_table(my.data.frame, c('residence', 'success', 'gender'))
Cross tabulation of residence, success, gender
|
|
success_gender
|
|
|
|
Αποτυχία_Γυναίκα
|
Αποτυχία_Άνδρας
|
Επιτυχία_Γυναίκα
|
Επιτυχία_Άνδρας
|
|
Sum
|
residence
|
Μικρή πόλη
|
0
|
0
|
2
|
1
|
|
3
|
Μεγάλη πόλη
|
1
|
2
|
4
|
2
|
|
9
|
Sum
|
1
|
2
|
6
|
3
|
|
12
|
my_contigency_table(my.data.frame, c('residence', 'success', 'gender'), row.vars = c(1, 3))
Cross tabulation of residence, success, gender
|
|
success
|
|
|
|
Αποτυχία
|
Επιτυχία
|
|
Sum
|
residence_gender
|
Μικρή πόλη_Γυναίκα
|
0
|
2
|
|
2
|
Μικρή πόλη_Άνδρας
|
0
|
1
|
|
1
|
Μεγάλη πόλη_Γυναίκα
|
1
|
4
|
|
5
|
Μεγάλη πόλη_Άνδρας
|
2
|
2
|
|
4
|
Sum
|
3
|
9
|
|
12
|
Mode (συνάρτηση
smode)
smode=function(x){
# Πηγή: https://stat.ethz.ch/pipermail/r-help/2011-March/273569.html
xtab=table(x)
modes=xtab[max(xtab)==xtab]
mag=as.numeric(modes[1]) #in case mult. modes, this is safer
themodes=names(modes)
mout=list(themodes=themodes,modeval=mag)
return(mout)}
Παράδειγμα
score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2)
smode(score)
## $themodes
## [1] "6" "12" "17"
##
## $modeval
## [1] 2
Mean & SD
(συνάρτηση mean_sd)
Παράδειγμα
score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2)
my_mean_sd(score)
## [1] "M = 12.1 (6.72), 95% C.I. 9.01 - 15.2"
Explore (συνάρτηση
my_explore_vars)
Παράδειγμα
exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)
Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$age) = 'Ηλικία'
my_explore_vars(my.data.frame, c("exam", "age"))
Descriptive statistics of Βαθμός εξετάσεων, Ηλικία
|
|
Variable
|
N
|
Missing
|
Valid
|
M (SD)
|
CI95
|
1
|
Βαθμός εξετάσεων
|
12
|
0
|
12
|
66.7 (21.7)
|
52.9 - 80.4
|
2
|
Ηλικία
|
12
|
0
|
12
|
23.7 (6.75)
|
19.4 - 28
|
Explore by group
(συνάρτηση my_explore_vars_by_group)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)
my_explore_vars_by_group(my.data.frame, 'Βαθμός', 'Φύλο', type = 'all')
Descriptive Statistics of Βαθμός among levels of Φύλο
|
|
Group
|
N
|
M (SD)
|
95% C.I.
|
Median
|
Interquartile Range
|
|
Α
|
5
|
47 (12)
|
32 - 62
|
50
|
15.0
|
|
Γ
|
7
|
80.7 (14.6)
|
67.3 - 94.2
|
80
|
25.0
|
|
Total
|
12
|
66.7 (21.7)
|
52.9 - 80.4
|
65
|
28.8
|
Tukeys HSD Post Hoc Test for variable: Βαθμός The
Tukey HSD is not a Post Hoc Test per se. It may provide valuable results
as an independent test as well.
|
|
Difference
|
Lower
|
Upper
|
p
|
Γ-Α
|
33.714
|
15.962
|
51.466
|
0.002
|
Έλεγχος κανονικότητας
(συνάρτηση my_check_normality_of_vector)
Παράδειγμα
score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2)
my_check_normality_of_vector(score)
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
0.483
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.88
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.968, p = 0.753
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.096, p = 0.931
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
Έλεγχος κανονικότητας
ανά υποομάδα (συνάρτηση my_check_normality_of_column_by_group)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)
my_check_normality_of_column_by_group(my.data.frame$Ηλικία, my.data.frame$Φύλο)
Γυναίκα
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
-0.473
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.675
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.959, p = 0.811
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.214, p = 0.428
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
Άνδρας
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
0.898
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.505
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.903, p = 0.427
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.241, p = 0.441
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
Έλεγχος ομοιογένειας
(ίση διακύμανση μίας μεταβλητής μεταξύ των επιπέδων ενός παράγοντα)
(συνάρτηση my_check_homogeneity_of_column)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(examdata, genderdata, agedata)
library(Hmisc)
var.labels = c(examdata = "Βαθμός Εξέτασης", agedata = 'Ηλικία', genderdata = 'Φύλο')
label(my.data.frame) = as.list(var.labels[match(names(my.data.frame), names(var.labels))])
my_check_homogeneity_of_column(my.data.frame, 'examdata', 'genderdata')
Standard Deviation of Βαθμός Εξέτασης among levels of Φύλο
|
Level
|
Γ
|
Α
|
Group Size
|
7
|
5
|
SD
|
14.6
|
12.0
|
Levene test of variance equality (homogeneity) of Βαθμός Εξέτασης
over Φύλο
|
F(1, 10) = 0.622, p = 0.448. Homogeneity hypothesis is confirmed.
|
Bartlett’s test of variance equality (homogeneity) of Βαθμός Εξέτασης
over Φύλο If you have strong evidence that your data do in
fact come from a normal, or nearly normal, distribution, then Bartlett’s
test has better performance.
|
c2(1) = 0.152, p = 0.697. Homogeneity hypothesis is
confirmed.
|
Correlation (συνάρτηση
my_cor_table)
Παράδειγμα
exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
test = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, test, gender, age)
Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$test) = 'Βαθμός προόδου'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$age) = 'Ηλικία'
# Πίνακες
my_cor_table(my.data.frame, c('exam', 'test', 'age'))
Pearson Correlation Coefficients (*:p<.05, **:p<.01,
***:p<.001)
|
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
-
|
.929***
|
.859***
|
Βαθμός προόδου
|
.929***
|
-
|
.841***
|
Ηλικία
|
.859***
|
.841***
|
-
|
# Πίνακες 2: Δείξε μόνο τις συσχετίσεις που είναι μεγαλύτερες από 0,9
my_cor_table_long(my.data.frame, c('exam', 'test', 'age'), dontshowbelow = 0.9)
Correlations
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
1
|
0.929
|
|
Βαθμός προόδου
|
0.929
|
1
|
|
Ηλικία
|
|
|
1
|
Observations
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
12
|
12
|
12
|
Βαθμός προόδου
|
12
|
12
|
12
|
Ηλικία
|
12
|
12
|
12
|
Significances
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
|
0
|
0
|
Βαθμός προόδου
|
0
|
|
0.001
|
Ηλικία
|
0
|
0.001
|
|
# Πίνακες 3: Δείξε μόνο τις συσχετίσεις που είναι στατιστικώς σημαντικές
my_cor_table_long(my.data.frame, c('exam', 'test', 'age'), show.only.significant = TRUE)
Correlations
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
1
|
0.929
|
0.859
|
Βαθμός προόδου
|
0.929
|
1
|
0.841
|
Ηλικία
|
0.859
|
0.841
|
1
|
Observations
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
12
|
12
|
12
|
Βαθμός προόδου
|
12
|
12
|
12
|
Ηλικία
|
12
|
12
|
12
|
Significances
|
Βαθμός εξετάσεων
|
Βαθμός προόδου
|
Ηλικία
|
Βαθμός εξετάσεων
|
|
0
|
0
|
Βαθμός προόδου
|
0
|
|
0.001
|
Ηλικία
|
0
|
0.001
|
|
# Διάγραμμα
my_cor_plot(my.data.frame, c('exam', 'test', 'age'))
Correlation ανά
υποομάδα με υπολογισμό p value και 95% διάστημα εμπιστοσύνης (συνάρτηση
my_cortable_by_factor_with_p)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata, Φύλο = genderdata, Ηλικία = agedata)
my_cortable_by_factor_with_p(my.data.frame, c('Εξετάσεις', 'Πρόοδος', 'Ηλικία'), afactor = 'Φύλο')
Correlation of Εξετάσεις, Πρόοδος, Ηλικία across levels of Φύλο
|
|
Var1
|
Var2
|
Group1
|
Group2
|
Corr1
|
N1
|
Corr2
|
N2
|
p.value
|
lowerCI
|
upperCI
|
1
|
Εξετάσεις
|
Πρόοδος
|
Γ
|
Α
|
0.901
|
7
|
0.952
|
5
|
0.662
|
-0.495
|
0.47
|
2
|
Εξετάσεις
|
Ηλικία
|
Γ
|
Α
|
0.55
|
7
|
0.876
|
5
|
0.393
|
-1.23
|
0.651
|
3
|
Πρόοδος
|
Ηλικία
|
Γ
|
Α
|
0.665
|
7
|
0.928
|
5
|
0.33
|
-1.107
|
0.467
|
Μερική συσχέτιση
(Partial Correlation) (συνάρτηση pcor.test)
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
library(ppcor)
ppcor::pcor.test(testdata, examdata, agedata)
## estimate p.value statistic n gp Method
## 1 0.7476411 0.008160923 3.377373 12 1 pearson
cor(testdata, examdata)
## [1] 0.929472
Πολλαπλή δοκιμασία χι -
τετράγωνο
Παράδειγμα 1: Εφαρμογή σε πίνακα συχνοτήτων
the.table = matrix(c(10, 11, 5, 8, 8, 6, 8, 6, 15, 25), 2, 5, byrow=TRUE)
rownames(the.table) = c('Αποτυχία', 'Επιτυχία')
colnames(the.table) = c('Πολύ αρνητική', 'Αρνητική', 'Ουδέτερη', 'Θετική', 'Πολύ θετική')
my_chi_square(the.table)
Test Var1 - Var2.
x2(4) = 9.57, p =
0.048. Result: DEPENDEND VARIABLES.
Observed Frequencies between combinations of Var1 -
Var2 The statistic x2 reflects the overall
difference between observed and expected frequencies.
|
|
Πολύ αρνητική
|
Αρνητική
|
Ουδέτερη
|
Θετική
|
Πολύ θετική
|
Sum
|
Αποτυχία
|
10
|
11
|
5
|
8
|
8
|
42
|
Επιτυχία
|
6
|
8
|
6
|
15
|
25
|
60
|
Sum
|
16
|
19
|
11
|
23
|
33
|
102
|
Expected Frequencies Less Than 5: 1
(10%) According to Moore & McCabe, no more than 20%
of the expected counts should be less than 5. Some expected counts can
be <5, provided none <1, and 80% of the expected counts should be
equal to or greater than 5.
|
|
Πολύ αρνητική
|
Αρνητική
|
Ουδέτερη
|
Θετική
|
Πολύ θετική
|
Sum
|
Αποτυχία
|
6.6
|
7.8
|
4.5
|
9.5
|
13.6
|
42
|
Επιτυχία
|
9.4
|
11.2
|
6.5
|
13.5
|
19.4
|
60
|
Sum
|
16
|
19
|
11
|
23
|
33
|
102
|
Παράδειγμα 2: Εφαρμογή σε μεταβλητές dataframe
gender = factor(c(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
agecat = factor(c(1, 2, 3, 3, 3, 2, 2, 2, 1, 1, 1, 2), levels = c(1, 2, 3), labels = c("15 - 24", "25 - 34", ">34"))
success = factor(c(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))
my.data.frame = data.frame(gender, agecat, success)
Hmisc::label(my.data.frame$success) = 'Επιτυχία στις εξετάσεις'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$agecat) = 'Ηλικιακή κατηγορία'
my_chi_square(my.data.frame, c('gender', 'agecat', 'success'))
Test Φύλο - Ηλικιακή κατηγορία.
x2(2) = 4.32, p
= 0.115. Result: Independent Variables.
Observed Frequencies between combinations of Φύλο - Ηλικιακή
κατηγορία The statistic x2 reflects the
overall difference between observed and expected frequencies.
|
|
15 - 24
|
25 - 34
|
>34
|
Sum
|
Γυναίκα
|
0
|
3
|
2
|
5
|
Άνδρας
|
4
|
2
|
1
|
7
|
Sum
|
4
|
5
|
3
|
12
|
Expected Frequencies Less Than 5: 6
(100%) According to Moore & McCabe, no more than
20% of the expected counts should be less than 5. Some expected counts
can be <5, provided none <1, and 80% of the expected counts should
be equal to or greater than 5.
|
|
15 - 24
|
25 - 34
|
>34
|
Sum
|
Γυναίκα
|
1.7
|
2.1
|
1.2
|
5
|
Άνδρας
|
2.3
|
2.9
|
1.8
|
7
|
Sum
|
4
|
5
|
3
|
12
|
Test Φύλο - Επιτυχία στις εξετάσεις.
x2(1) =
3.09, p = 0.079. Result: Independent Variables.
Observed Frequencies between combinations of Φύλο - Επιτυχία στις
εξετάσεις The statistic x2 reflects the
overall difference between observed and expected frequencies.
|
|
Αποτυχία
|
Επιτυχία
|
Sum
|
Γυναίκα
|
4
|
1
|
5
|
Άνδρας
|
2
|
5
|
7
|
Sum
|
6
|
6
|
12
|
Expected Frequencies Less Than 5: 4
(100%) According to Moore & McCabe, no more than
20% of the expected counts should be less than 5. Some expected counts
can be <5, provided none <1, and 80% of the expected counts should
be equal to or greater than 5.
|
|
Αποτυχία
|
Επιτυχία
|
Sum
|
Γυναίκα
|
2.5
|
2.5
|
5
|
Άνδρας
|
3.5
|
3.5
|
7
|
Sum
|
6
|
6
|
12
|
Test Ηλικιακή κατηγορία - Επιτυχία στις εξετάσεις.
x2(2) = 3.13, p = 0.209. Result:
Independent Variables.
Observed Frequencies between combinations of Ηλικιακή κατηγορία -
Επιτυχία στις εξετάσεις The statistic x2
reflects the overall difference between observed and expected
frequencies.
|
|
Αποτυχία
|
Επιτυχία
|
Sum
|
15 - 24
|
1
|
3
|
4
|
25 - 34
|
4
|
1
|
5
|
>34
|
1
|
2
|
3
|
Sum
|
6
|
6
|
12
|
Expected Frequencies Less Than 5: 6
(100%) According to Moore & McCabe, no more than
20% of the expected counts should be less than 5. Some expected counts
can be <5, provided none <1, and 80% of the expected counts should
be equal to or greater than 5.
|
|
Αποτυχία
|
Επιτυχία
|
Sum
|
15 - 24
|
2
|
2
|
4
|
25 - 34
|
2.5
|
2.5
|
5
|
>34
|
1.5
|
1.5
|
3
|
Sum
|
6
|
6
|
12
|
Δοκιμασία t - test
Δοκιμασία για ένα
δείγμα (One sample t - test) (συνάρτηση my_t_test_one_sample)
Παράδειγμα 1: Εφαρμογή σε vector
one.sample.data = c(490, 503, 499, 492, 500, 501, 489, 478, 498, 508)
my_t_test_one_sample(one.sample.data, mu = 500)
M(SD) (496 ± 8.64), N = 10. H0: μ = 500 vs H1: μ ≠
500. H0 is not rejected (t(9) = 1.538, p = 0.159).
|
Παράδειγμα 2: Εφαρμογή σε dataframe
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata)
my_t_test_one_sample(my.data.frame, var = 'Εξετάσεις', mu = 60)
Εξετάσεις (66.7 ± 21.7), N = 12. H0: μ = 60 vs H1:
μ ≠ 60. H0 is not rejected (t(11) = 1.066, p =
0.309).
|
my_t_test_one_sample(my.data.frame, var = 'Πρόοδος', mu = 60)
Πρόοδος (67.4 ± 22), N = 12. H0: μ = 60 vs H1: μ ≠
60. H0 is not rejected (t(11) = 1.166, p = 0.268).
|
Δοκιμασία δύο
ανεξάρτητων δειγμάτων (Independent samples t - test) (συνάρτηση
my_t_test_independent_samples)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
genderdata = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
locationdata = factor(c(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0), levels = c(0, 1), labels = c("Χωριό/Κωμόπολη", "Πόλη"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata, Φύλο = genderdata, Ηλικία = agedata, Τοποθεσία = locationdata)
my_t_test_independent_samples(my.data.frame, dependent.vars = c('Εξετάσεις', 'Πρόοδος', 'Ηλικία'), group.var = c('Φύλο', 'Τοποθεσία'), print.also.in.console = FALSE)
Εξετάσεις over Φύλο H
0: μ
Γ = μ
Α vs
H
1: μ
Γ ≠ μ
Α.
H0 is
rejected. Group Γ (N = 7): 80.7 ± 14.6 vs Group Α (N = 5): 47 ±
12,
t(10) = 4.232, p = 0.002.
Equality of variances: F(6, 4) = 1.461, p = 0.744.
Πρόοδος over Φύλο H
0: μ
Γ = μ
Α vs
H
1: μ
Γ ≠ μ
Α.
H0 is
rejected. Group Γ (N = 7): 78.9 ± 20.5 vs Group Α (N = 5): 51.4
± 12.3,
t(10) = 2.646, p = 0.024.
Equality of variances: F(6, 4) = 2.77, p = 0.343.
Ηλικία over Φύλο H
0: μ
Γ = μ
Α vs
H
1: μ
Γ ≠ μ
Α.
H0 is
rejected. Group Γ (N = 7): 28 ± 5.1 vs Group Α (N = 5): 17.6 ±
2.7,
t(10) = 4.127, p = 0.002.
Equality of variances: F(6, 4) = 3.562, p = 0.239.
Εξετάσεις over Τοποθεσία H
0: μ
Πόλη =
μ
Χωριό/Κωμόπολη vs H
1: μ
Πόλη ≠
μ
Χωριό/Κωμόπολη.
H0 is not rejected.
Group Πόλη (N = 6): 61.7 ± 22.9 vs Group Χωριό/Κωμόπολη (N = 6): 71.7 ±
21.1, t(10) = 0.785, p = 0.451.
Equality of variances: F(5,
5) = 1.179, p = 0.861.
Πρόοδος over Τοποθεσία H
0: μ
Πόλη =
μ
Χωριό/Κωμόπολη vs H
1: μ
Πόλη ≠
μ
Χωριό/Κωμόπολη.
H0 is not rejected.
Group Πόλη (N = 6): 63.8 ± 21.3 vs Group Χωριό/Κωμόπολη (N = 6): 71 ±
24.2, t(10) = 0.545, p = 0.598.
Equality of variances: F(5,
5) = 0.774, p = 0.786.
Ηλικία over Τοποθεσία H
0: μ
Πόλη =
μ
Χωριό/Κωμόπολη vs H
1: μ
Πόλη ≠
μ
Χωριό/Κωμόπολη.
H0 is not rejected.
Group Πόλη (N = 6): 24.3 ± 7.69 vs Group Χωριό/Κωμόπολη (N = 6): 23 ±
6.32, t(10) = 0.328, p = 0.75.
Equality of variances: F(5, 5)
= 1.477, p = 0.679.
Δοκιμασία για
ζευγαρωτές παρατηρήσεις (Paired samples t - test) (συνάρτηση
my_t_test_paired_samples)
Παράδειγμα
weight1 = c(81.2, 76.7, 75.7, 81.2, 71.7, 71.2, 68.5, 89.8, 107.5, 105.7, 99.3, 100.7, 90.3, 105.7, 98.0, 116.6)
weight2 = c(78.0, 73.0, 73.0, 78.5, 69.9, 64.9, 63.5, 87.1, 102.1, 102.5, 97.1, 95.3, 87.5, 102.5, 93.4, 112.9)
weight3 = c(78.4, 72.1, 73.7, 78.5, 69.7, 65.4, 63.3, 85.5, 101.3, 102.2, 95.7, 93.9, 86.5, 102.5, 93.9, 113.9)
my.data.frame = data.frame(weight1, weight2, weight3)
Hmisc::label(my.data.frame$weight1) = '1η Μέτρηση'
Hmisc::label(my.data.frame$weight2) = '2η Μέτρηση'
Hmisc::label(my.data.frame$weight3) = '3η Μέτρηση'
my_t_test_paired_samples(my.data.frame, c('weight1', 'weight2', 'weight3'))
1η Μέτρηση (90 ± 15.2) vs 2η Μέτρηση (86.3 ± 15.2), N = 16 pairs.
H
0: μ
1η Μέτρηση = μ
2η Μέτρηση vs
H
1: μ
1η Μέτρηση ≠ μ
2η Μέτρηση.
H0 is rejected (t(15) = 11.2, p <
0.001).
1η Μέτρηση (90 ± 15.2) vs 3η Μέτρηση (86 ± 15.1), N = 16 pairs.
H
0: μ
1η Μέτρηση = μ
3η Μέτρηση vs
H
1: μ
1η Μέτρηση ≠ μ
3η Μέτρηση.
H0 is rejected (t(15) = 10.9, p <
0.001).
2η Μέτρηση (86.3 ± 15.2) vs 3η Μέτρηση (86 ± 15.1), N = 16 pairs.
H
0: μ
2η Μέτρηση = μ
3η Μέτρηση vs
H
1: μ
2η Μέτρηση ≠ μ
3η Μέτρηση.
H0 is not rejected (t(15) = 1.45, p = 0.168).
Γραμμική παλινδρόμηση
(Linear Regression) (συνάρτηση my_lm)
Παράδειγμα
test = c(5.5, 6, 7, 5, 8, 2, 9, 10, 2, 3, 4, 6.5, 8.5, 1)
exam = c(6.5, 6, 8, 7.5, 7, 4, 8, 10, 1, 5, 5, 6, 9, 5)
my.df = data.frame(test, exam)
Hmisc::label(my.df$test) = 'Πρόοδος'
Hmisc::label(my.df$exam) = 'Τελικές εξετάσεις'
my.model = my_lm(my.df, exam ~ test)
my.model$htmlreport
Linear regression results for Τελικές εξετάσεις Model:
exam ~ const + test
|
|
|
|
95% C.I.
|
|
B
|
SE
|
t
|
p
|
|
Lower
|
Upper
|
Constant
|
2.501
|
0.747
|
3.348
|
0.006
|
|
0.873
|
4.129
|
Πρόοδος
|
0.684
|
0.121
|
5.651
|
<0.001
|
|
0.42
|
0.947
|
Coefficient of determination
|
R2 = 0.727
|
Homoscedasticity Breusch–Pagan test
|
x2(1) = 2.905, p = 0.088. Homoscedasticity assumption is not
rejected. Model seems to be valid.
|
Normality tests of models’ residuals
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
-0.704
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
3.619
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.943, p = 0.455
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.136, p = 0.69
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
ANOVA (συνάρτηση
my_ANOVA)
Παράδειγμα
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(examdata, genderdata, agedata)
library(Hmisc)
var.labels = c(examdata = "Βαθμός Εξέτασης", agedata = 'Ηλικία', genderdata = 'Φύλο')
label(my.data.frame) = as.list(var.labels[match(names(my.data.frame), names(var.labels))])
# Εκτέλεση με δήλωση όλων των μεταβλητών για post - hoc έλεγχο
all.ANOVA.output = my_ANOVA(data = my.data.frame, model = examdata ~ agedata + genderdata)
all.ANOVA.output$htmlTable
ANOVA Results for Βαθμός Εξέτασης
|
|
SS
|
df
|
F
|
p
|
(Intercept)
|
159.721
|
1
|
1.244
|
0.294
|
agedata
|
695.902
|
1
|
5.42
|
0.045
|
genderdata
|
198.218
|
1
|
1.544
|
0.245
|
Residuals
|
1155.527
|
9
|
|
|
LSD Test for variable: Φύλο Fisher’s LSD is a series of
pairwise t-tests, with each test using the mean squared error from the
significant ANOVA as its pooled variance estimate (and naturally taking
the associated degrees of freedom). It is a valid test, in case where
the ANOVA is significant.
|
|
N
|
Βαθμός Εξέτασης
|
Groups
|
Α
|
5
|
47
|
b
|
Γ
|
7
|
80.714
|
a
|
Tukeys HSD Post Hoc Test for variable: Φύλο The Tukey
HSD is not a Post Hoc Test per se. It may provide valuable results even
if ANOVA is not significant.
|
|
Difference
|
Lower
|
Upper
|
p
|
Α-Γ
|
-33.714
|
-51.466
|
-15.962
|
0.002
|
Standard Deviation of Βαθμός Εξέτασης among levels of all levels of
genderdata.
|
Level
|
Γ
|
Α
|
Group Size
|
7
|
5
|
SD
|
14.6
|
12.0
|
Levene test of variance equality (homogeneity) of Βαθμός Εξέτασης
over all levels of genderdata.
|
F(1, 10) = 0.622, p = 0.448. Homogeneity hypothesis is confirmed.
|
Bartlett’s test of variance equality (homogeneity) of Βαθμός Εξέτασης
over all levels of genderdata. If you have strong
evidence that your data do in fact come from a normal, or nearly normal,
distribution, then Bartlett’s test has better performance.
|
c2(1) = 0.152, p = 0.697. Homogeneity hypothesis is
confirmed.
|
Pearson Correlation Coefficients (*:p<.05, **:p<.01,
***:p<.001)
|
|
Βαθμός Εξέτασης
|
Ηλικία
|
Βαθμός Εξέτασης
|
-
|
.859***
|
Ηλικία
|
.859***
|
-
|
Normality test for model residuals. If the main goal of
an ANOVA is to see whether or not certain effects are significant, then
the assumption of normality of the residuals is only required for small
samples, thanks to the central limit theorem. With sample sizes of a few
hundred participants even extreme violations of the normality
assumptions are unproblematic. So mild violations of this assumptions
are usually no problem with sample sizes exceeding 30.
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
-0.564
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.149
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.898, p = 0.147
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.241, p = 0.053
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
Repeated Measures
ANOVA (συνάρτηση my_Repeated_ANOVA)
Παράδειγμα
gender = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(81.2, 76.7, 75.7, 81.2, 71.7, 71.2, 68.5, 89.8, 107.5, 105.7, 99.3, 100.7, 90.3, 105.7, 98.0, 116.6)
weight2 = c(78.0, 73.0, 73.0, 78.5, 69.9, 64.9, 63.5, 87.1, 102.1, 102.5, 97.1, 95.3, 87.5, 102.5, 93.4, 112.9)
weight3 = c(78.4, 72.1, 73.7, 78.5, 69.7, 65.4, 63.3, 85.5, 101.3, 102.2, 95.7, 93.9, 86.5, 102.5, 93.9, 113.9)
dataF = data.frame(gender, weight1, weight2, weight3)
all.necessary.data = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols = c('weight1', 'weight2', 'weight3'))
all.necessary.data$anova.table
Repeated ANOVA table: Greenhouse-Geisser correction applied.
|
|
Effect
|
df
|
MSE
|
F
|
p.value
|
1
|
gender
|
1, 14
|
163.04
|
49.36 ***
|
<.001
|
2
|
time
|
1.57, 22.05
|
0.97
|
99.35 ***
|
<.001
|
3
|
gender:time
|
1.57, 22.05
|
0.97
|
0.69
|
.480
|
weight1
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
0.157
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.832
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.965, p = 0.756
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.131, p = 0.657
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
weight2
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
0.211
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.555
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.967, p = 0.787
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.103, p = 0.919
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
weight3
Normality statistics
|
|
Statistic
|
Description
|
Skewness & Kurtosis
|
Skewness
|
0.302
|
Normal distribution has skew = 0. For a unimodal distribution,
negative skew commonly indicates that the tail is on the left side of
the distribution, and positive skew indicates that the tail is on the
right.
|
Kurtosis
|
2.802
|
Normal distribution has kurtosis = 3. Values over 3 indicates a
platykurtic distribution and values less than 3 indicates a leptokurtic
distribution.
|
Normality Tests. Η0: This sample is from a normal
distribution vs Η1: Not the Η0.
|
Shapiro–Wilk
|
W = 0.969, p = 0.821
|
This test is more appropriate method for small sample sizes
(<50 samples) although it can also be handling on larger sample
size.
|
Lilliefors
|
D = 0.114, p = 0.833
|
The Lilliefors test uses the same calculations as the
Kolmogorov-Smirnov test, but it is more conservative in the sense that
the Lilliefors Test is less likely to show that data is normally
distributed.
|
Mauchly’s Test for Sphericity. Tests the hypothesis that all pairs of
measurements have equal variance in their difference.
|
|
Mauchly’s W
|
p
|
time
|
0.621
|
0.045
|
gender:time
|
0.621
|
0.045
|
Post Hoc Test (LSD,
Tukeys) (συναρτήσεις my_post_hoc_LSD_test, my_post_hoc_Tukeys_test)
Παράδειγμα (LSD)
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)
my_post_hoc_LSD_test(data = my.data.frame, model = Βαθμός ~ Ηλικία + Φύλο, variable = 'Φύλο')
## [1] "LSD Test"
## Βαθμός groups
## Γ 80.71429 a
## Α 47.00000 b
Παράδειγμα (Tukey’s 1)
examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)
my_post_hoc_Tukeys_test(data = my.data.frame, model = Βαθμός ~ Φύλο, variable = 'Φύλο')
## [1] "Tukey Test"
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = model, data = data)
##
## $Φύλο
## diff lwr upr p adj
## Α-Γ -33.71429 -51.46649 -15.96208 0.0017394
Παράδειγμα (Tukey’s 2)
examdata = c(65, 65, 95, 70, 55, 90, 98, 90, 50, 100, 30, 95)
classdata = factor(c(3, 1, 2, 1, 1, 2, 2, 3, 3, 3, 1, 2), labels = c("Α", "Β", "Γ"))
my.data.frame = data.frame(Βαθμός = examdata, Τάξη = classdata)
library(multcompView)
model=lm(my.data.frame$Βαθμός ~ my.data.frame$Τάξη)
TUKEY <- TukeyHSD(x=aov(model))
TK_data<-round(as.data.frame((TUKEY)[1]), 3)
names(TK_data) = c('Difference', 'Lower', 'Upper', 'p')
htmlTable::htmlTable(TK_data)
|
Difference
|
Lower
|
Upper
|
p
|
Β-Α
|
39.5
|
6.257
|
72.743
|
0.022
|
Γ-Α
|
21.25
|
-11.993
|
54.493
|
0.229
|
Γ-Β
|
-18.25
|
-51.493
|
14.993
|
0.322
|
plot(TUKEY , las=2 , col="brown")
Reliability (Alpha,
Omega) (συνάρτηση my_reliability)
Παράδειγμα
sample.data = data.frame(item1 = c(3, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 2, 3, 2, 3, 2, 3, 2),
item2 = c(1, 2, 2, 2, 1, 2, 1, 1, 3, 2, 1, 2, 2, 2, 1, 2, 1, 1, 3, 2),
item3 = c(1, 3, 2, 2, 3, 2, 1, 3, 3, 2, 1, 3, 2, 2, 3, 2, 1, 3, 3, 2))
my_reliability(factorname = 'MyFactor', variables = c("item1", "item2", "item3"), data = sample.data)
##
## ΔΕΙΚΤΕΣ ΑΞΙΟΠΙΣΤΙΑΣ
##
## MyFactor
## alpha 0.000
## omega 0.298
## omega2 0.298
## omega3 0.298
## avevar 0.298
Plots
Bar Plot
(Ραβδόγραμμα) (συναρτήσεις my_bar_plot, my_bar_plot2,
my_bar_plot_percent)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot(outcome)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot2(outcome)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot_percent(outcome)
Pie Plot (Κυκλικό
διάγραμμα) (συναρτήσεις my_pie_plot, my_pie_3dplot)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_pie_plot(outcome)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_pie_3dplot(outcome)
Stack Plot
(Ραβδόγραμμα δύο ποιοτικών μεταβλητών) (συνάρτηση my_stack_plot)
Παράδειγμα 1: Εφαρμογή σε πίνακα
the.table = matrix(c(10, 11, 5, 8, 8, 6, 8, 6, 15, 25), 2, 5, byrow=TRUE)
rownames(the.table) = c('Αποτυχία', 'Επιτυχία')
colnames(the.table) = c('Πολύ αρνητική', 'Αρνητική', 'Ουδέτερη', 'Θετική', 'Πολύ θετική')
my_stack_plot(the.table, type = 3, flip.plot = TRUE)
Παράδειγμα 2: Εφαρμογή σε dataframe
library(Hmisc)
sample.data <- tibble(
success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία")),
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας")))
label(sample.data$success) = 'Αποτέλεσμα'
label(sample.data$gender) = 'Φύλο'
my_stack_plot(sample.data, 'gender', 'success', type = 1)
my_stack_plot(sample.data, 'gender', 'success', type = 2)
my_stack_plot(sample.data, 'gender', 'success', type = 3, flip.plot = TRUE)
Συχνότητες εμφάνισης
τιμών (συνάρτηση my_tab_fun)
Παράδειγμα
outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Όχι", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
gender = c("Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι", "Αγόρι", "Αγόρι", "Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι")
sample.data = data.frame(gender = gender, outcome = outcome)
my_tab_fun(sample.data)
## Ίσως Ναι Όχι Sum
## gender 6 6 6 18
## outcome 3 5 4 12
Binary Plot
(Ραβδόγραμμα πολλών δίτιμων μεταβλητών) (συνάρτηση
my_binary_vars_plot)
Παράδειγμα
data.for.plot = data.frame(Χαρακτηριστικό1 = c(0, 0, 0, 1, 1, 0), Χαρακτηριστικό2 = c(1, 0, 1, 0, 0, 1), Χαρακτηριστικό3 = c(1, 1, 1, 0, 1, 0))
my_tab_fun(data.for.plot)
## X0 X1 Sum
## Χαρακτηριστικό1 4 2 6
## Χαρακτηριστικό2 3 3 6
## Χαρακτηριστικό3 2 4 6
my_binary_vars_plot(data.for.plot, xlab = 'Χαρακτηριστικά', labels = c('1o', '2o', '3o'))
Ιστόγραμμα
(Histogram) (συνάρτηση my_histFrequency)
Παράδειγμα
sampledata = c(11.8, 3.6, 16.6, 13.5, 4.8, 8.3, 8.9, 9.1, 7.7, 2.3, 12.1, 6.1, 10.2, 8.0, 11.4, 6.8, 9.6, 19.5, 15.3, 12.3, 8.5, 15.9, 18.7, 11.7, 6.2, 11.2, 10.4, 7.2, 5.5, 14.5 )
my_histFrequency(sampledata, xlab = 'Τιμή')
Παράδειγμα
sampledata = rnorm(100, 0, 1)
my_histFrequency_with_normal_curve(sampledata, xlab = 'Τιμή')
Παράδειγμα
sampledata = rnorm(100, 0, 1)
my_histPercent(sampledata, xlab = 'Τιμή')
Παράδειγμα
value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple(sampledata$value, sampledata$group, xlab = 'Τιμή')
Παράδειγμα
value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple_density(sampledata$value, sampledata$group, xlab = 'Τιμή')
Παράδειγμα
value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple_density_sidebyside(sampledata$value, sampledata$group, xlab = 'Τιμή')
Διάγραμμα Μέσων
Τιμών μίας ποσοτικής μεταβλητής
yvalues = rnorm(1000, 0, 1)
afactor = as.factor(rbinom(1000, 3, 0.2))
data = data.frame(yvalues = yvalues, afactor = afactor)
Hmisc::label(yvalues) = 'Y'
Hmisc::label(afactor) = 'Factor'
gplots::plotmeans(yvalues ~ afactor, data = data, xlab=Hmisc::label(afactor), ylab=Hmisc::label(yvalues))
Διάγραμμα
αλληλεπίδρασης δύο παραγόντων πάνω σε μία συνεχή μεταβλητή (Interaction
Plot) (συνάρτηση my.interaction.plot)
my.interaction.plot = function(column.to.describe, afactor1, afactor2, legend.label = '', xlab = '', ylab = '', main = ''){
interaction.plot(afactor1, afactor2,
column.to.describe, type="b", col=c("red","blue"),
legend=T, trace.label = legend.label, lty=c(1,2), lwd=2, pch=c(18,24),
xlab=xlab, ylab=ylab,
main=main,
ylim = c(min(column.to.describe), max(column.to.describe)))
}
Παράδειγμα
answer = c("Όχι", "Όχι", "Ίσως", "Ναι", "Όχι", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
gender = c("Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι", "Αγόρι", "Αγόρι", "Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι")
score = c(13, 14, 20, 20, 19, 18, 13, 10, 9, 12, 15, 15)
my.interaction.plot(score, answer, gender)
Διάγραμμα
αλληλεπίδρασης ενός παράγοντα και επαναλαμβανόμενων μετρήσεων πάνω σε
μία συνεχή μεταβλητή (Repeated Measures Interaction Plot)
Παράδειγμα
gender = c(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(89.8, 107.5, 105.7, 81.2, 99.3, 76.7, 100.7, 75.7, 90.3, 105.7, 81.2, 71.7, 71.2, 98, 116.6, 68.5)
weight2 = c(90.8, 108.5, 106.7, 80.2, 100.3, 75.7, 101.7, 74.8, 91.3, 106.7, 80.2, 70.7, 70.2, 99, 117.6, 67.5)
weight3 = c(91.8, 109.5, 107.7, 79.2, 101.3, 74.7, 102.7, 73.7, 92.3, 107.7, 79.2, 69.7, 69.2, 100, 118.6, 66.5)
dataF = data.frame(gender, weight1, weight2, weight3)
repeated.ANOVA = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols = c('weight1', 'weight2', 'weight3'))
data.long = repeated.ANOVA$data.long
# Αλλαγή των labels από weight1, weight2, weight3 σε 1η, 2η, 3η
data.long$time=plyr::revalue(data.long$time, c("weight1"="1η", "weight2"="2η", "weight3"="3η"))
min.all = min(data.long$DependentScore)
lower.y.limit = min.all - 0.05* abs(min.all)
max.all = max(data.long$DependentScore)
upper.y.limit = max.all + 0.05* abs(max.all)
par(mar=c(5.1, 14.1, 4.1, 2.1), xpd=TRUE)
cols = c("brown1", "cadetblue3")
interaction.plot(data.long$time, data.long$gender,
data.long$DependentScore, type="b", col = cols,
legend=F, lty=c(1,2), lwd=2, pch=c(18,24),
xlab="", ylab="Βάρος",
ylim = c(lower.y.limit, upper.y.limit))
legend("left",c("Γυναίκα", "Άνδρας"), bty="n", lty=c(1,2), lwd=2, pch=c(18,24), col=cols, title="Φύλο",inset=c(-0.55, 0.1))
Παράδειγμα 2
gender = c(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(89.8, 107.5, 105.7, 81.2, 99.3, 76.7, 100.7, 75.7, 90.3, 105.7, 81.2, 71.7, 71.2, 98, 116.6, 68.5)
weight2 = c(90.8, 108.5, 106.7, 80.2, 100.3, 75.7, 101.7, 74.8, 91.3, 106.7, 80.2, 70.7, 70.2, 99, 117.6, 67.5)
weight3 = c(91.8, 109.5, 107.7, 79.2, 101.3, 74.7, 102.7, 73.7, 92.3, 107.7, 79.2, 69.7, 69.2, 100, 118.6, 66.5)
dataF = data.frame(gender, weight1, weight2, weight3)
repeated.ANOVA = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols = c('weight1', 'weight2', 'weight3'))
data.long = repeated.ANOVA$data.long
# Αλλαγή των labels από weight1, weight2, weight3 σε 1η, 2η, 3η
data.long$time=plyr::revalue(data.long$time, c("weight1"="1η", "weight2"="2η", "weight3"="3η"))
library(dplyr)
library(ggplot2)
data.long %>%
group_by(gender, time) %>%
summarise(weight = mean(DependentScore)) -> data.long2
#data.long2$gender <- factor(data.long2$gender, levels = rev(levels(data.long2$gender)))
min.all = min(data.long$DependentScore)
lower.y.limit = min.all - 0.05* abs(min.all)
max.all = max(data.long$DependentScore)
upper.y.limit = max.all + 0.05* abs(max.all)
data.long2 %>%
ggplot() +
aes(x = time, y = weight, color = gender, label = round(weight, 1)) +
geom_line(aes(group = gender)) +
scale_color_brewer(palette = "Set1") +
geom_point() +
geom_text(hjust=0.2, vjust=-1) +
xlab("Μέτρηση") +
ylab("Βάρος") +
ylim(lower.y.limit, upper.y.limit) +
labs(color='Φύλο') +
guides(color=guide_legend(override.aes=list(fill=NA))) +
theme(legend.position="left",
legend.text=element_text(size=13),
legend.title=element_text(size=13),
axis.title=element_text(size=13),
axis.text = element_text(size = 13),
axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="black"),
panel.background = element_rect(fill='transparent', color='black'),
plot.background = element_rect(fill='transparent', color=NA))
Διάγραμμα διασποράς
(Scatterplot) (συνάρτηση my_scatterplot)
Παράδειγμα
valueX = rnorm(1000, 0, 1)
valueY = rbinom(1000, 40, 0.2)
valuecolor = as.factor(c(rep(1, 400), rep(2, 300), rep(3, 300)))
my_scatterplot(x = valueX, y = valueY, color = valuecolor, histograms = TRUE)
Παράδειγμα 2 (χωρίς σημεία)
valueX = rnorm(1000, 0, 1)
valueY = rbinom(1000, 40, 0.2)
valuecolor = as.factor(c(rep(1, 400), rep(2, 300), rep(3, 300)))
data.sample = data.frame(valueX, valueY, valuecolor)
library(ggplot2)
theme_set(theme_bw(16))
ggplot(data.sample, aes(x = valueX, y = valueY, color = valuecolor)) + geom_smooth(method = lm, se=F)+
scale_color_brewer(palette = "Set1") + labs(x = valueX, y = valueY, colour = valuecolor)+ theme(legend.position="left")
Logistic Regression
(συνάρτηση my_logistic_regression)
Παράδειγμα
income = c(31, 55, 120, 25, 38, 16, 23, 64, 29, 100, 72, 61, 26, 176, 49, 25, 67, 28, 19, 41)
debtinc = c(17, 6, 3, 10, 4, 2, 5, 10, 16, 9, 8, 6, 2, 9, 9, 20, 31, 17, 24, 16)
default = factor(c(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1), labels = c('Κανονική αποπληρωμή', 'Αδυναμία αποπληρωμής'))
lr.data.frame = data.frame(income, debtinc, default)
Hmisc::label(lr.data.frame$income) = "Οικογενειακό εισόδημα"
Hmisc::label(lr.data.frame$debtinc) = "Χρέος ως ποσοστό του εισοδήματος"
Hmisc::label(lr.data.frame$default) = "Αδυναμία αποπληρωμής"
my.model = my_logistic_regression(lr.data.frame, default ~ income + debtinc)
my.model$htmlreport
Coding: 0, 1 = Κανονική αποπληρωμή, Αδυναμία αποπληρωμής If
you need reverse order please run the function by setting reverse.levels
= TRUE
|
Logistic regression results for Αδυναμία
αποπληρωμής Model: Αδυναμία αποπληρωμής ~ Οικογενειακό
εισόδημα + Χρέος ως ποσοστό του εισοδήματος
|
|
|
|
95% C.I.
|
|
B
|
SE
|
z
|
p
|
Exp(B)
|
|
Lower
|
Upper
|
Constant
|
0.69
|
1.839
|
0.375
|
0.708
|
1.993
|
|
0.054
|
73.239
|
Οικογενειακό εισόδημα
|
-0.089
|
0.053
|
-1.668
|
0.095
|
0.915
|
|
0.824
|
1.016
|
Χρέος ως ποσοστό του εισοδήματος
|
0.264
|
0.124
|
2.127
|
0.033
|
1.302
|
|
1.021
|
1.66
|
Nagelkerke Coefficient of determination R2 = 0.721
|
Area Under Curve AUC = 0.939
|
Prediction quality of Αδυναμία αποπληρωμής at threshold 0.5
|
|
Observed
|
|
|
|
Κανονική αποπληρωμή
|
Αδυναμία αποπληρωμής
|
|
Sum
|
Predicted
|
Κανονική αποπληρωμή
|
10
|
2
|
|
12
|
Αδυναμία αποπληρωμής
|
1
|
7
|
|
8
|
Sum
|
11
|
9
|
|
20
|
Sensitivity (SNS): 0.778 Specificity (SPC):
0.909
|
You may also consider other thresholds by setting e.g. threshold
= c(0.3, 0.7, 0.9)
|
my.model$ROCplot
round(my.model$ROCplot$data, 3)
## threshold specificity sensitivity 1-specificity
## 21 Inf 1.000 0.000 0.000
## 20 0.986 1.000 0.111 0.000
## 19 0.962 1.000 0.222 0.000
## 18 0.942 1.000 0.333 0.000
## 17 0.927 1.000 0.444 0.000
## 16 0.914 1.000 0.556 0.000
## 15 0.845 1.000 0.667 0.000
## 14 0.764 1.000 0.778 0.000
## 13 0.620 0.909 0.778 0.091
## 12 0.469 0.818 0.778 0.182
## 11 0.349 0.818 0.889 0.182
## 10 0.232 0.727 0.889 0.273
## 9 0.188 0.636 0.889 0.364
## 8 0.124 0.636 1.000 0.364
## 7 0.076 0.545 1.000 0.455
## 6 0.054 0.455 1.000 0.545
## 5 0.033 0.364 1.000 0.636
## 4 0.015 0.273 1.000 0.727
## 3 0.001 0.182 1.000 0.818
## 2 0.000 0.091 1.000 0.909
## 1 -Inf 0.000 1.000 1.000
Principal Components
Analysis (PCA) (συνάρτηση my_PCA_analysis)
Παράδειγμα
item1 = c(1, 5, 4, 3, 4, 5, 2, 1, 3, 5)
item2 = c(3, 3, 4, 2, 5, 3, 2, 3, 2, 2)
item3 = c(2, 4, 3, 3, 3, 2, 4, 3, 4, 3)
PCA.data = data.frame(item1, item2, item3)
############################
# 1η Εφαρμογή PCA.analysis #
############################
PCA.results = my_PCA_analysis(PCA.data)
print(PCA.results$corr_matrix)
## q1 q2 q3
## q1 1.00000000 0.1639973 0.06726728
## q2 0.16399730 1.0000000 -0.28771371
## q3 0.06726728 -0.2877137 1.00000000
print(PCA.results$corplot)
print(PCA.results$nfactors)
## [1] 2
print(PCA.results$communalities)
## $communality
## q1 q2 q3
## 0.8890525 0.7115495 0.7622915
print(PCA.results$eigenvalues)
## Eigenvalue Percent of Variance Cumulative Percent
## 1 1.306 43.5% 43.5%
## 2 1.057 35.2% 78.7%
## 3 0.637 21.2% 99.9%
print(PCA.results$loadings)
##
## Loadings:
## PC1 PC2
## q1 0.289 0.897
## q2 0.836 0.113
## q3 -0.723 0.489
##
## PC1 PC2
## SS loadings 1.306 1.057
## Proportion Var 0.435 0.352
## Cumulative Var 0.435 0.788
print(PCA.results$predicted.values)
## PC1 PC2
## 1 0.5648414 -1.9248768
## 2 -0.3707910 1.4958717
## 3 0.8823299 0.4341975
## 4 -0.5468449 -0.3215270
## 5 1.5262000 0.5412219
## 6 1.1305797 0.2418257
## 7 -1.4389649 -0.2361796
## 8 -0.1858440 -1.2978538
## 9 -1.2975303 0.3054960
## 10 -0.2639758 0.7618243
PCA.results$htmlTable
Variables
|
Original Item
|
q1
|
item1
|
q2
|
item2
|
q3
|
item3
|
Correlation Matrix
|
q1
|
q2
|
q3
|
q1
|
1
|
0.164
|
0.067
|
q2
|
0.164
|
1
|
-0.288
|
q3
|
0.067
|
-0.288
|
1
|
Number of factors extracted
Total Variance Explained
|
Eigenvalue
|
Percent of Variance
|
Cumulative Percent
|
1
|
1.306
|
43.5%
|
43.5%
|
2
|
1.057
|
35.2%
|
78.7%
|
3
|
0.637
|
21.2%
|
99.9%
|
Communalities
|
communality
|
q1
|
0.889
|
q2
|
0.712
|
q3
|
0.762
|
Component Matrix
|
PC1
|
PC2
|
q1
|
0.289
|
0.897
|
q2
|
0.836
|
0.113
|
q3
|
-0.723
|
0.489
|
############################
# 2η Εφαρμογή PCA.analysis #
############################
PCA.results = my_PCA_analysis(PCA.data, rotate = 'varimax')
PCA.results$htmlTable
Variables
|
Original Item
|
q1
|
item1
|
q2
|
item2
|
q3
|
item3
|
Correlation Matrix
|
q1
|
q2
|
q3
|
q1
|
1
|
0.164
|
0.067
|
q2
|
0.164
|
1
|
-0.288
|
q3
|
0.067
|
-0.288
|
1
|
Number of factors extracted
Total Variance Explained
|
Eigenvalue
|
Percent of Variance
|
Cumulative Percent
|
1
|
1.306
|
43.5%
|
43.5%
|
2
|
1.057
|
35.2%
|
78.7%
|
3
|
0.637
|
21.2%
|
99.9%
|
Communalities
|
communality
|
q1
|
0.889
|
q2
|
0.712
|
q3
|
0.762
|
Component Matrix
|
PC1
|
PC2
|
q1
|
0.289
|
0.897
|
q2
|
0.836
|
0.113
|
q3
|
-0.723
|
0.489
|
Rotated Component Matrix
|
RC1
|
RC2
|
q1
|
0.001
|
0.943
|
q2
|
-0.761
|
0.364
|
q3
|
0.839
|
0.243
|
Component Transformation Matrix
|
RC1
|
RC2
|
1
|
0.952
|
0.308
|
2
|
-0.308
|
0.952
|
############################
# 3η Εφαρμογή PCA.analysis #
############################
PCA.results = my_PCA_analysis(PCA.data, rotate = 'varimax', sort.loadings = TRUE)
PCA.results$htmlTable
Variables
|
Original Item
|
q1
|
item1
|
q2
|
item2
|
q3
|
item3
|
Correlation Matrix
|
q1
|
q2
|
q3
|
q1
|
1
|
0.164
|
0.067
|
q2
|
0.164
|
1
|
-0.288
|
q3
|
0.067
|
-0.288
|
1
|
Number of factors extracted
Total Variance Explained
|
Eigenvalue
|
Percent of Variance
|
Cumulative Percent
|
1
|
1.306
|
43.5%
|
43.5%
|
2
|
1.057
|
35.2%
|
78.7%
|
3
|
0.637
|
21.2%
|
99.9%
|
Communalities
|
communality
|
q1
|
0.889
|
q2
|
0.712
|
q3
|
0.762
|
Component Matrix
|
PC1
|
PC2
|
q1
|
0.289
|
0.897
|
q2
|
0.836
|
0.113
|
q3
|
-0.723
|
0.489
|
Rotated Component Matrix
|
RC1
|
RC2
|
q1
|
0.001
|
0.943
|
q2
|
-0.761
|
0.364
|
q3
|
0.839
|
0.243
|
Component Transformation Matrix
|
RC1
|
RC2
|
1
|
0.952
|
0.308
|
2
|
-0.308
|
0.952
|
############################
# 4η Εφαρμογή PCA.analysis #
############################
# Αν eigeinvalues.limit < 0, τότε βρίσκει το βέλτιστο πλήθος παραγόντων εφαρμόζοντας παράλληλη ανάλυση...
PCA.results = my_PCA_analysis(PCA.data, eigeinvalues.limit = -1)
## Parallel analysis suggests that the number of factors = 0 and the number of components = 0
PCA.results$htmlTable
Variables
|
Original Item
|
q1
|
item1
|
q2
|
item2
|
q3
|
item3
|
Correlation Matrix
|
q1
|
q2
|
q3
|
q1
|
1
|
0.164
|
0.067
|
q2
|
0.164
|
1
|
-0.288
|
q3
|
0.067
|
-0.288
|
1
|
Number of factors extracted
Total Variance Explained
|
Eigenvalue
|
Percent of Variance
|
Cumulative Percent
|
1
|
1.306
|
43.5%
|
43.5%
|
2
|
1.057
|
35.2%
|
78.7%
|
3
|
0.637
|
21.2%
|
99.9%
|
Communalities
|
communality
|
q1
|
1
|
q2
|
1
|
q3
|
1
|
Component Matrix
|
PC1
|
PC2
|
PC3
|
q1
|
0.289
|
0.897
|
-0.333
|
q2
|
0.836
|
0.113
|
0.537
|
q3
|
-0.723
|
0.489
|
0.488
|
Ανάλυση κλίμακας
Likert
Ομαδοποίηση
αποκρίσεων με δενδρόγραμμα (συνάρτηση my_dendrogram)
Η απόσταση δύο μεταβλητών μπορεί να υπολογιστεί με πολλούς τρόπους. Η
παρακάτω συνάρτηση υλοποιεί τον υπολογισμό των αποστάσεων με την
ευκλείδεια απόσταση και με την απόσταση Hamming η οποία είναι κατάλληλη
για μεταβλητές με τιμές 0 και 1 και ορίζεται ως το πλήθος των τιμών που
δεν είναι ίσες μεταξύ των δύο διανυσμάτων. Η παρακάτω συνάρτηση
υπολογίζει τον πίνακα των αποστάσεων για ένα dataframe και δημιουργεί το
δενδρόγραμμα των μεταβλητών, επιστρέφοντας και τον αντίστοιχο πίνακα
αποστάσεων.
Παράδειγμα 1
Item1 = c(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1)
Item2 = c(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1)
Item3 = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1)
my.data.frame = data.frame(Item1, Item2, Item3)
my.d = my_dendrogram(my.data.frame, plot.labels = c("Item 1", "Item 2", "Item 3"), type = 'hamming')
my.d$distMat
Variables
|
Original Item
|
q1
|
Item1
|
q2
|
Item2
|
q3
|
Item3
|
Hamming distance
|
Cols
|
|
Item 1
|
Item 2
|
Item 3
|
Rows
|
Item 1
|
0
|
4
|
6
|
Item 2
|
4
|
0
|
6
|
Item 3
|
6
|
6
|
0
|
Παράδειγμα 2
Item1 = c(0, 5, 4, 3, 1, 2, 2, 1, 1, 0, 0, 1)
Item2 = c(5, 4, 4, 1, 1, 1, 1, 5, 5, 4, 3, 3)
Item3 = c(1, 2, 3, 3, 2, 1, 1, 0, 0, 0, 0, 1)
Item4 = c(1, 2, 3, 3, 2, 1, 1, 0, 0, 0, 0, 1)
my.data.frame = data.frame(Item1, Item2, Item3, Item4)
my.d = my_dendrogram(my.data.frame, plot.labels = c("Item 1", "Item 2", "Item 3", "Item 4"), type = 'euclidean', groups = 2)
my.d$distMat
Variables
|
Original Item
|
q1
|
Item1
|
q2
|
Item2
|
q3
|
Item3
|
q4
|
Item4
|
Euclidean distance
|
Cols
|
|
Item 1
|
Item 2
|
Item 3
|
Item 4
|
Rows
|
Item 1
|
0
|
9.6
|
4
|
4
|
Item 2
|
9.6
|
0
|
10.2
|
10.2
|
Item 3
|
4
|
10.2
|
0
|
0
|
Item 4
|
4
|
10.2
|
0
|
0
|
Πίνακας συχνοτήτων ή
σχετικών συχνοτήτων των αποκρίσεων (συνάρτηση
my_likert_scale_description)
Παράδειγμα
Item1 = c(0, 5, 3, 2, 1, 5, 2, 1, 3, 3, 4, 4)
Item2 = c(0, 1, 0, 2, 0, 1, 1, 1, 2, 2, 2, 2)
Item3 = c(3, 5, 4, 4, 2, 2, 4, 5, 3, 3, 4, 3)
my.data.frame = data.frame(Item1, Item2, Item3)
my_likert_scale_description(data = my.data.frame, type = 'freq')
|
Item
|
Responses
|
0
|
1
|
2
|
3
|
4
|
5
|
M (SD)
|
1
|
Item1
|
12
|
1
|
2
|
2
|
3
|
2
|
2
|
2.8 (1.6)
|
2
|
Item2
|
12
|
3
|
4
|
5
|
0
|
0
|
0
|
1.2 (0.83)
|
3
|
Item3
|
12
|
0
|
0
|
2
|
4
|
4
|
2
|
3.5 (1.0)
|
my_likert_scale_description(data = my.data.frame, type = 'percent')
|
Item
|
Responses
|
0
|
1
|
2
|
3
|
4
|
5
|
M (SD)
|
1
|
Item1
|
12
|
8.3%
|
16.7%
|
16.7%
|
25%
|
16.7%
|
16.7%
|
2.8 (1.6)
|
2
|
Item2
|
12
|
25%
|
33.3%
|
41.7%
|
0%
|
0%
|
0%
|
1.2 (0.83)
|
3
|
Item3
|
12
|
0%
|
0%
|
16.7%
|
33.3%
|
33.3%
|
16.7%
|
3.5 (1.0)
|
my_likert_scale_description(data = my.data.frame, type = 'both')
|
Item
|
Responses
|
0
|
1
|
2
|
3
|
4
|
5
|
M (SD)
|
1
|
Item1
|
12
|
1 (8.3%)
|
2 (16.7%)
|
2 (16.7%)
|
3 (25%)
|
2 (16.7%)
|
2 (16.7%)
|
2.8 (1.6)
|
2
|
Item2
|
12
|
3 (25%)
|
4 (33.3%)
|
5 (41.7%)
|
0 (0%)
|
0 (0%)
|
0 (0%)
|
1.2 (0.83)
|
3
|
Item3
|
12
|
0 (0%)
|
0 (0%)
|
2 (16.7%)
|
4 (33.3%)
|
4 (33.3%)
|
2 (16.7%)
|
3.5 (1.0)
|
Υπολογισμός
ομοιότητας αποκρίσεων μεταξύ των items (συνάρτηση
response_similarity_among_items)
Παράδειγμα
Item1 = c(0, 5, 3, 2, 1, 5, 2, 1, 3, 3, 4, 4)
Item2 = c(0, 1, 0, 2, 0, 1, 1, 1, 2, 2, 2, 2)
Item3 = c(3, 5, 4, 4, 2, 2, 4, 5, 3, 3, 4, 3)
Item4 = c(1, 1, 2, 3, 3, 2, 1, 1, 2, 2, 2, 2)
my.data.frame = data.frame(Item1, Item2, Item3, Item4)
similarity.report = response_similarity_among_items(c("Item1", "Item2", "Item3", "Item4"), my.data.frame, acceptable.difference = 0)
htmlTable::htmlTable(similarity.report$items.mean)
|
Item
|
Mean
|
1
|
Item1
|
2.75
|
2
|
Item2
|
1.17
|
3
|
Item3
|
3.5
|
4
|
Item4
|
1.83
|
htmlTable::htmlTable(similarity.report$agreement.df)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
100
|
25
|
33.3
|
8.3
|
Item2
|
25
|
100
|
0
|
58.3
|
Item3
|
33.3
|
0
|
100
|
8.3
|
Item4
|
8.3
|
58.3
|
8.3
|
100
|
htmlTable::htmlTable(similarity.report$agreement.descending)
|
Item1
|
Item2
|
Agreement
|
8
|
Item4
|
Item2
|
58.3
|
3
|
Item3
|
Item1
|
33.3
|
2
|
Item2
|
Item1
|
25
|
4
|
Item4
|
Item1
|
8.3
|
12
|
Item4
|
Item3
|
8.3
|
htmlTable::htmlTable(similarity.report$difference.df)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
0
|
1.58
|
-0.75
|
0.92
|
Item2
|
-1.58
|
0
|
-2.33
|
-0.67
|
Item3
|
0.75
|
2.33
|
0
|
1.67
|
Item4
|
-0.92
|
0.67
|
-1.67
|
0
|
htmlTable::htmlTable(similarity.report$difference.descending)
|
Item1
|
Item2
|
Difference
|
7
|
Item3
|
Item2
|
2.33
|
3
|
Item3
|
Item1
|
0.75
|
8
|
Item4
|
Item2
|
0.67
|
1
|
Item1
|
Item1
|
0
|
6
|
Item2
|
Item2
|
0
|
11
|
Item3
|
Item3
|
0
|
16
|
Item4
|
Item4
|
0
|
4
|
Item4
|
Item1
|
-0.92
|
2
|
Item2
|
Item1
|
-1.58
|
12
|
Item4
|
Item3
|
-1.67
|
htmlTable::htmlTable(similarity.report$difference.significances)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
|
0.003
|
0.191
|
0.094
|
Item2
|
0.003
|
|
0
|
0.039
|
Item3
|
0.191
|
0
|
|
0.003
|
Item4
|
0.094
|
0.039
|
0.003
|
|
htmlTable::htmlTable(similarity.report$spearman.corr.r)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
1
|
0.408
|
0.015
|
0.066
|
Item2
|
0.408
|
1
|
0.066
|
0.272
|
Item3
|
0.015
|
0.066
|
1
|
-0.493
|
Item4
|
0.066
|
0.272
|
-0.493
|
1
|
htmlTable::htmlTable(similarity.report$spearman.corr.n)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
12
|
12
|
12
|
12
|
Item2
|
12
|
12
|
12
|
12
|
Item3
|
12
|
12
|
12
|
12
|
Item4
|
12
|
12
|
12
|
12
|
htmlTable::htmlTable(similarity.report$spearman.corr.sig)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
|
0.188
|
0.964
|
0.839
|
Item2
|
0.188
|
|
0.839
|
0.392
|
Item3
|
0.964
|
0.839
|
|
0.104
|
Item4
|
0.839
|
0.392
|
0.104
|
|
htmlTable::htmlTable(similarity.report$spearman.corr.descenting)
|
Item1
|
Item2
|
Spearman
|
1
|
Item1
|
Item1
|
1
|
6
|
Item2
|
Item2
|
1
|
11
|
Item3
|
Item3
|
1
|
16
|
Item4
|
Item4
|
1
|
2
|
Item2
|
Item1
|
0.408
|
8
|
Item4
|
Item2
|
0.272
|
7
|
Item3
|
Item2
|
0.066
|
4
|
Item4
|
Item1
|
0.066
|
3
|
Item3
|
Item1
|
0.015
|
12
|
Item4
|
Item3
|
-0.493
|
htmlTable::htmlTable(similarity.report$pearson.corr.r)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
1
|
0.442
|
0.028
|
0.04
|
Item2
|
0.442
|
1
|
0.109
|
0.202
|
Item3
|
0.028
|
0.109
|
1
|
-0.507
|
Item4
|
0.04
|
0.202
|
-0.507
|
1
|
htmlTable::htmlTable(similarity.report$pearson.corr.n)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
12
|
12
|
12
|
12
|
Item2
|
12
|
12
|
12
|
12
|
Item3
|
12
|
12
|
12
|
12
|
Item4
|
12
|
12
|
12
|
12
|
htmlTable::htmlTable(similarity.report$pearson.corr.sig)
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
|
0.151
|
0.93
|
0.903
|
Item2
|
0.151
|
|
0.736
|
0.528
|
Item3
|
0.93
|
0.736
|
|
0.093
|
Item4
|
0.903
|
0.528
|
0.093
|
|
htmlTable::htmlTable(similarity.report$pearson.corr.descenting)
|
Item1
|
Item2
|
Pearson
|
1
|
Item1
|
Item1
|
1
|
6
|
Item2
|
Item2
|
1
|
11
|
Item3
|
Item3
|
1
|
16
|
Item4
|
Item4
|
1
|
2
|
Item2
|
Item1
|
0.442
|
8
|
Item4
|
Item2
|
0.202
|
7
|
Item3
|
Item2
|
0.109
|
4
|
Item4
|
Item1
|
0.04
|
3
|
Item3
|
Item1
|
0.028
|
12
|
Item4
|
Item3
|
-0.507
|
Γλωσσική ομοιότητα
μεταξύ των ερωτήσεων (cosine και όλες οι μέθοδοι που υποστηρίζει η
βιβλιοθήκη stringdist) (συναρτήσεις
word_similarity_among_items_stringdist και
word_similarit_among_items)
# Υπολογίζει αποκλειστικά και μόνο το cos μεταξύ των προτάσεων...
word_similarity_among_items=function(sentences){
# https://stackoverflow.com/questions/57092479/finding-the-cosine-similarity-of-a-sentence-with-many-others-in-r
df.to.return.cos.between.sentences.manual <- data.frame(matrix(ncol = length(sentences), nrow = 0))
colnames(df.to.return.cos.between.sentences.manual) = paste("Item", 1:length(sentences), sep = "")
names(sentences) = paste("Item", 1:length(sentences), sep = "")
for(asentence in sentences){
sv = c(sentences, Check = asentence)
svs <- strsplit(tolower(sv), "\\s+")
termf <- table(stack(svs))
idf <- log(1/rowMeans(termf != 0))
tfidf <- termf*idf
dp <- t(tfidf[, length(sv)]) %*% tfidf[,-length(sv)]
cosim <- dp/(sqrt(colSums(tfidf[,-length(sv)]^2))*sqrt(sum(tfidf[,length(sv)]^2)))
df.to.return.cos.between.sentences.manual[nrow(df.to.return.cos.between.sentences.manual) + 1,] = round(cosim, 3)
}
rownames(df.to.return.cos.between.sentences.manual) = paste("Item", 1:length(sentences), sep = "")
return(df.to.return.cos.between.sentences.manual)
}
Παράδειγμα
GOHAI.items.en = c("How often did you limit the kinds or amounts of food you eat because of problems with your teeth or dentures?",
"How often do you have trouble biting or chewing any kinds of food, such as tough meat or apples?",
"How often were you able to swallow comfortably?",
"How often have your teeth or dentures prevented you from speaking the way you wanted?",
"How often were you able to eat anything without feeling discomfort?",
"How often did you limit contacts with people because of the condition of your teeth or dentures?",
"How often were you pleased or happy with the looks of your teeth and gums, or dentures?",
"How often did you use medication to relieve pain or discomfort from around your mouth?",
"How often were you worried or concerned about the problems of your teeth, gums or dentures?",
"How often did you feel nervous or self-conscious because of problems with your teeth, gums, or dentures?",
"How often did you feel uncomfortable eating in front of people because of problems with your teeth or dentures?",
"How often were your teeth or gums sensitive to hot, cold, or sweets?")
word_similarity_among_items_stringdist(GOHAI.items.en)
## Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11
## Item1 0.000 0.033 0.127 0.061 0.051 0.024 0.031 0.033 0.048 0.040 0.029
## Item2 0.033 0.000 0.132 0.088 0.064 0.054 0.047 0.060 0.090 0.070 0.058
## Item3 0.127 0.132 0.000 0.166 0.108 0.149 0.127 0.161 0.134 0.146 0.111
## Item4 0.061 0.088 0.166 0.000 0.065 0.069 0.028 0.110 0.048 0.080 0.048
## Item5 0.051 0.064 0.108 0.065 0.000 0.033 0.071 0.089 0.088 0.097 0.041
## Item6 0.024 0.054 0.149 0.069 0.033 0.000 0.055 0.055 0.068 0.063 0.032
## Item7 0.031 0.047 0.127 0.028 0.071 0.055 0.000 0.085 0.037 0.047 0.037
## Item8 0.033 0.060 0.161 0.110 0.089 0.055 0.085 0.000 0.073 0.077 0.064
## Item9 0.048 0.090 0.134 0.048 0.088 0.068 0.037 0.073 0.000 0.036 0.034
## Item10 0.040 0.070 0.146 0.080 0.097 0.063 0.047 0.077 0.036 0.000 0.037
## Item11 0.029 0.058 0.111 0.048 0.041 0.032 0.037 0.064 0.034 0.037 0.000
## Item12 0.052 0.092 0.149 0.068 0.073 0.056 0.043 0.111 0.045 0.045 0.061
## Item12
## Item1 0.052
## Item2 0.092
## Item3 0.149
## Item4 0.068
## Item5 0.073
## Item6 0.056
## Item7 0.043
## Item8 0.111
## Item9 0.045
## Item10 0.045
## Item11 0.061
## Item12 0.000
Γλωσσική ομοιότητα
μεταξύ των ερωτήσεων (πλήθος κοινών λέξεων + δείκτης ομοιότητας)
(συνάρτηση common_words_similarity_among_items)
Παράδειγμα
GOHAI.items.en = c("How often did you limit the kinds or amounts of food you eat because of problems with your teeth or dentures?",
"How often do you have trouble biting or chewing any kinds of food, such as tough meat or apples?",
"How often were you able to swallow comfortably?",
"How often have your teeth or dentures prevented you from speaking the way you wanted?",
"How often were you able to eat anything without feeling discomfort?",
"How often did you limit contacts with people because of the condition of your teeth or dentures?",
"How often were you pleased or happy with the looks of your teeth and gums, or dentures?",
"How often did you use medication to relieve pain or discomfort from around your mouth?",
"How often were you worried or concerned about the problems of your teeth, gums or dentures?",
"How often did you feel nervous or self-conscious because of problems with your teeth, gums, or dentures?",
"How often did you feel uncomfortable eating in front of people because of problems with your teeth or dentures?",
"How often were your teeth or gums sensitive to hot, cold, or sweets?")
common_words_similarity_among_items(GOHAI.items.en)
## $df.to.return.common.words.count
## Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1 21 8 3 9 4 14 11 6 11 13 13 6
## 2 8 19 3 5 3 5 6 4 6 6 5 4
## 3 3 3 8 3 6 3 4 4 4 3 3 4
## 4 9 5 3 15 3 8 8 6 8 7 7 5
## 5 4 3 6 3 11 3 4 5 4 3 3 4
## 6 14 5 3 8 3 17 10 6 9 11 13 5
## 7 11 6 4 8 4 10 17 5 12 11 9 8
## 8 6 4 4 6 5 6 5 15 5 6 6 5
## 9 11 6 4 8 4 9 12 5 16 11 9 8
## 10 13 6 3 7 3 11 11 6 11 17 13 7
## 11 13 5 3 7 3 13 9 6 9 13 19 5
## 12 6 4 4 5 4 5 8 5 8 7 5 13
##
## $common.words.count.long
## Var1 Var2 Freq
## 2 Item2 Item1 8
## 3 Item3 Item1 3
## 4 Item4 Item1 9
## 5 Item5 Item1 4
## 6 Item6 Item1 14
## 7 Item7 Item1 11
## 8 Item8 Item1 6
## 9 Item9 Item1 11
## 10 Item10 Item1 13
## 11 Item11 Item1 13
## 12 Item12 Item1 6
## 15 Item3 Item2 3
## 16 Item4 Item2 5
## 17 Item5 Item2 3
## 18 Item6 Item2 5
## 19 Item7 Item2 6
## 20 Item8 Item2 4
## 21 Item9 Item2 6
## 22 Item10 Item2 6
## 23 Item11 Item2 5
## 24 Item12 Item2 4
## 28 Item4 Item3 3
## 29 Item5 Item3 6
## 30 Item6 Item3 3
## 31 Item7 Item3 4
## 32 Item8 Item3 4
## 33 Item9 Item3 4
## 34 Item10 Item3 3
## 35 Item11 Item3 3
## 36 Item12 Item3 4
## 41 Item5 Item4 3
## 42 Item6 Item4 8
## 43 Item7 Item4 8
## 44 Item8 Item4 6
## 45 Item9 Item4 8
## 46 Item10 Item4 7
## 47 Item11 Item4 7
## 48 Item12 Item4 5
## 54 Item6 Item5 3
## 55 Item7 Item5 4
## 56 Item8 Item5 5
## 57 Item9 Item5 4
## 58 Item10 Item5 3
## 59 Item11 Item5 3
## 60 Item12 Item5 4
## 67 Item7 Item6 10
## 68 Item8 Item6 6
## 69 Item9 Item6 9
## 70 Item10 Item6 11
## 71 Item11 Item6 13
## 72 Item12 Item6 5
## 80 Item8 Item7 5
## 81 Item9 Item7 12
## 82 Item10 Item7 11
## 83 Item11 Item7 9
## 84 Item12 Item7 8
## 93 Item9 Item8 5
## 94 Item10 Item8 6
## 95 Item11 Item8 6
## 96 Item12 Item8 5
## 106 Item10 Item9 11
## 107 Item11 Item9 9
## 108 Item12 Item9 8
## 119 Item11 Item10 13
## 120 Item12 Item10 7
## 132 Item12 Item11 5
##
## $df.to.return.common.words.string
## Item1
## 1 amounts because dentures did eat food How kinds limit of of often or or problems teeth the with you you your
## 2 food How kinds of often or or you
## 3 How often you
## 4 dentures How often or teeth the you you your
## 5 eat How often you
## 6 because dentures did How limit of of often or teeth the with you your
## 7 dentures How of often or or teeth the with you your
## 8 did How often or you your
## 9 dentures How of often or or problems teeth the you your
## 10 because dentures did How of often or or problems teeth with you your
## 11 because dentures did How of of often or problems teeth with you your
## 12 How often or or teeth your
## Item2
## 1 food How kinds of often or or you
## 2 any apples as biting chewing do food have How kinds meat of often or or such tough trouble you
## 3 How often you
## 4 have How often or you
## 5 How often you
## 6 How of often or you
## 7 How of often or or you
## 8 How often or you
## 9 How of often or or you
## 10 How of often or or you
## 11 How of often or you
## 12 How often or or
## Item3
## 1 How often you
## 2 How often you
## 3 able comfortably How often swallow to were you
## 4 How often you
## 5 able How often to were you
## 6 How often you
## 7 How often were you
## 8 How often to you
## 9 How often were you
## 10 How often you
## 11 How often you
## 12 How often to were
## Item4
## 1 dentures How often or teeth the you you your
## 2 have How often or you
## 3 How often you
## 4 dentures from have How often or prevented speaking teeth the wanted way you you your
## 5 How often you
## 6 dentures How often or teeth the you your
## 7 dentures How often or teeth the you your
## 8 from How often or you your
## 9 dentures How often or teeth the you your
## 10 dentures How often or teeth you your
## 11 dentures How often or teeth you your
## 12 How often or teeth your
## Item5
## 1 eat How often you
## 2 How often you
## 3 able How often to were you
## 4 How often you
## 5 able anything discomfort eat feeling How often to were without you
## 6 How often you
## 7 How often were you
## 8 discomfort How often to you
## 9 How often were you
## 10 How often you
## 11 How often you
## 12 How often to were
## Item6
## 1 because dentures did How limit of of often or teeth the with you your
## 2 How of often or you
## 3 How often you
## 4 dentures How often or teeth the you your
## 5 How often you
## 6 because condition contacts dentures did How limit of of often or people teeth the with you your
## 7 dentures How of often or teeth the with you your
## 8 did How often or you your
## 9 dentures How of often or teeth the you your
## 10 because dentures did How of often or teeth with you your
## 11 because dentures did How of of often or people teeth with you your
## 12 How often or teeth your
## Item7
## 1 dentures How of often or or teeth the with you your
## 2 How of often or or you
## 3 How often were you
## 4 dentures How often or teeth the you your
## 5 How often were you
## 6 dentures How of often or teeth the with you your
## 7 and dentures gums happy How looks of often or or pleased teeth the were with you your
## 8 How often or you your
## 9 dentures gums How of often or or teeth the were you your
## 10 dentures gums How of often or or teeth with you your
## 11 dentures How of often or teeth with you your
## 12 gums How often or or teeth were your
## Item8
## 1 did How often or you your
## 2 How often or you
## 3 How often to you
## 4 from How often or you your
## 5 discomfort How often to you
## 6 did How often or you your
## 7 How often or you your
## 8 around did discomfort from How medication mouth often or pain relieve to use you your
## 9 How often or you your
## 10 did How often or you your
## 11 did How often or you your
## 12 How often or to your
## Item9
## 1 dentures How of often or or problems teeth the you your
## 2 How of often or or you
## 3 How often were you
## 4 dentures How often or teeth the you your
## 5 How often were you
## 6 dentures How of often or teeth the you your
## 7 dentures gums How of often or or teeth the were you your
## 8 How often or you your
## 9 about concerned dentures gums How of often or or problems teeth the were worried you your
## 10 dentures gums How of often or or problems teeth you your
## 11 dentures How of often or problems teeth you your
## 12 gums How often or or teeth were your
## Item10
## 1 because dentures did How of often or or problems teeth with you your
## 2 How of often or or you
## 3 How often you
## 4 dentures How often or teeth you your
## 5 How often you
## 6 because dentures did How of often or teeth with you your
## 7 dentures gums How of often or or teeth with you your
## 8 did How often or you your
## 9 dentures gums How of often or or problems teeth you your
## 10 because dentures did feel gums How nervous of often or or problems self-conscious teeth with you your
## 11 because dentures did feel How of often or problems teeth with you your
## 12 gums How often or or teeth your
## Item11
## 1 because dentures did How of of often or problems teeth with you your
## 2 How of often or you
## 3 How often you
## 4 dentures How often or teeth you your
## 5 How often you
## 6 because dentures did How of of often or people teeth with you your
## 7 dentures How of often or teeth with you your
## 8 did How often or you your
## 9 dentures How of often or problems teeth you your
## 10 because dentures did feel How of often or problems teeth with you your
## 11 because dentures did eating feel front How in of of often or people problems teeth uncomfortable with you your
## 12 How often or teeth your
## Item12
## 1 How often or or teeth your
## 2 How often or or
## 3 How often to were
## 4 How often or teeth your
## 5 How often to were
## 6 How often or teeth your
## 7 gums How often or or teeth were your
## 8 How often or to your
## 9 gums How often or or teeth were your
## 10 gums How often or or teeth your
## 11 How often or teeth your
## 12 cold gums hot How often or or sensitive sweets teeth to were your
##
## $common.words.string.long
## Var1 Var2
## 2 Item2 Item1
## 3 Item3 Item1
## 4 Item4 Item1
## 5 Item5 Item1
## 6 Item6 Item1
## 7 Item7 Item1
## 8 Item8 Item1
## 9 Item9 Item1
## 10 Item10 Item1
## 11 Item11 Item1
## 12 Item12 Item1
## 15 Item3 Item2
## 16 Item4 Item2
## 17 Item5 Item2
## 18 Item6 Item2
## 19 Item7 Item2
## 20 Item8 Item2
## 21 Item9 Item2
## 22 Item10 Item2
## 23 Item11 Item2
## 24 Item12 Item2
## 28 Item4 Item3
## 29 Item5 Item3
## 30 Item6 Item3
## 31 Item7 Item3
## 32 Item8 Item3
## 33 Item9 Item3
## 34 Item10 Item3
## 35 Item11 Item3
## 36 Item12 Item3
## 41 Item5 Item4
## 42 Item6 Item4
## 43 Item7 Item4
## 44 Item8 Item4
## 45 Item9 Item4
## 46 Item10 Item4
## 47 Item11 Item4
## 48 Item12 Item4
## 54 Item6 Item5
## 55 Item7 Item5
## 56 Item8 Item5
## 57 Item9 Item5
## 58 Item10 Item5
## 59 Item11 Item5
## 60 Item12 Item5
## 67 Item7 Item6
## 68 Item8 Item6
## 69 Item9 Item6
## 70 Item10 Item6
## 71 Item11 Item6
## 72 Item12 Item6
## 80 Item8 Item7
## 81 Item9 Item7
## 82 Item10 Item7
## 83 Item11 Item7
## 84 Item12 Item7
## 93 Item9 Item8
## 94 Item10 Item8
## 95 Item11 Item8
## 96 Item12 Item8
## 106 Item10 Item9
## 107 Item11 Item9
## 108 Item12 Item9
## 119 Item11 Item10
## 120 Item12 Item10
## 132 Item12 Item11
## Freq
## 2 food How kinds of often or or you
## 3 How often you
## 4 dentures How often or teeth the you you your
## 5 eat How often you
## 6 because dentures did How limit of of often or teeth the with you your
## 7 dentures How of often or or teeth the with you your
## 8 did How often or you your
## 9 dentures How of often or or problems teeth the you your
## 10 because dentures did How of often or or problems teeth with you your
## 11 because dentures did How of of often or problems teeth with you your
## 12 How often or or teeth your
## 15 How often you
## 16 have How often or you
## 17 How often you
## 18 How of often or you
## 19 How of often or or you
## 20 How often or you
## 21 How of often or or you
## 22 How of often or or you
## 23 How of often or you
## 24 How often or or
## 28 How often you
## 29 able How often to were you
## 30 How often you
## 31 How often were you
## 32 How often to you
## 33 How often were you
## 34 How often you
## 35 How often you
## 36 How often to were
## 41 How often you
## 42 dentures How often or teeth the you your
## 43 dentures How often or teeth the you your
## 44 from How often or you your
## 45 dentures How often or teeth the you your
## 46 dentures How often or teeth you your
## 47 dentures How often or teeth you your
## 48 How often or teeth your
## 54 How often you
## 55 How often were you
## 56 discomfort How often to you
## 57 How often were you
## 58 How often you
## 59 How often you
## 60 How often to were
## 67 dentures How of often or teeth the with you your
## 68 did How often or you your
## 69 dentures How of often or teeth the you your
## 70 because dentures did How of often or teeth with you your
## 71 because dentures did How of of often or people teeth with you your
## 72 How often or teeth your
## 80 How often or you your
## 81 dentures gums How of often or or teeth the were you your
## 82 dentures gums How of often or or teeth with you your
## 83 dentures How of often or teeth with you your
## 84 gums How often or or teeth were your
## 93 How often or you your
## 94 did How often or you your
## 95 did How often or you your
## 96 How often or to your
## 106 dentures gums How of often or or problems teeth you your
## 107 dentures How of often or problems teeth you your
## 108 gums How often or or teeth were your
## 119 because dentures did feel How of often or problems teeth with you your
## 120 gums How often or or teeth your
## 132 How often or teeth your
##
## $df.to.return.common.words.index
## Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1 1.000 0.400 0.207 0.500 0.250 0.737 0.579 0.333 0.595 0.684 0.650 0.353
## 2 0.400 1.000 0.222 0.294 0.200 0.278 0.333 0.235 0.343 0.333 0.263 0.250
## 3 0.207 0.222 1.000 0.261 0.632 0.240 0.320 0.348 0.333 0.240 0.222 0.381
## 4 0.500 0.294 0.261 1.000 0.231 0.500 0.500 0.400 0.516 0.438 0.412 0.357
## 5 0.250 0.200 0.632 0.231 1.000 0.214 0.286 0.385 0.296 0.214 0.200 0.333
## 6 0.737 0.278 0.240 0.500 0.214 1.000 0.588 0.375 0.545 0.647 0.722 0.333
## 7 0.579 0.333 0.320 0.500 0.286 0.588 1.000 0.312 0.727 0.647 0.500 0.533
## 8 0.333 0.235 0.348 0.400 0.385 0.375 0.312 1.000 0.323 0.375 0.353 0.357
## 9 0.595 0.343 0.333 0.516 0.296 0.545 0.727 0.323 1.000 0.667 0.514 0.552
## 10 0.684 0.333 0.240 0.438 0.214 0.647 0.647 0.375 0.667 1.000 0.722 0.467
## 11 0.650 0.263 0.222 0.412 0.200 0.722 0.500 0.353 0.514 0.722 1.000 0.312
## 12 0.353 0.250 0.381 0.357 0.333 0.333 0.533 0.357 0.552 0.467 0.312 1.000
##
## $common.words.index.long
## Var1 Var2 Freq
## 2 Item2 Item1 0.400
## 3 Item3 Item1 0.207
## 4 Item4 Item1 0.500
## 5 Item5 Item1 0.250
## 6 Item6 Item1 0.737
## 7 Item7 Item1 0.579
## 8 Item8 Item1 0.333
## 9 Item9 Item1 0.595
## 10 Item10 Item1 0.684
## 11 Item11 Item1 0.650
## 12 Item12 Item1 0.353
## 15 Item3 Item2 0.222
## 16 Item4 Item2 0.294
## 17 Item5 Item2 0.200
## 18 Item6 Item2 0.278
## 19 Item7 Item2 0.333
## 20 Item8 Item2 0.235
## 21 Item9 Item2 0.343
## 22 Item10 Item2 0.333
## 23 Item11 Item2 0.263
## 24 Item12 Item2 0.250
## 28 Item4 Item3 0.261
## 29 Item5 Item3 0.632
## 30 Item6 Item3 0.240
## 31 Item7 Item3 0.320
## 32 Item8 Item3 0.348
## 33 Item9 Item3 0.333
## 34 Item10 Item3 0.240
## 35 Item11 Item3 0.222
## 36 Item12 Item3 0.381
## 41 Item5 Item4 0.231
## 42 Item6 Item4 0.500
## 43 Item7 Item4 0.500
## 44 Item8 Item4 0.400
## 45 Item9 Item4 0.516
## 46 Item10 Item4 0.438
## 47 Item11 Item4 0.412
## 48 Item12 Item4 0.357
## 54 Item6 Item5 0.214
## 55 Item7 Item5 0.286
## 56 Item8 Item5 0.385
## 57 Item9 Item5 0.296
## 58 Item10 Item5 0.214
## 59 Item11 Item5 0.200
## 60 Item12 Item5 0.333
## 67 Item7 Item6 0.588
## 68 Item8 Item6 0.375
## 69 Item9 Item6 0.545
## 70 Item10 Item6 0.647
## 71 Item11 Item6 0.722
## 72 Item12 Item6 0.333
## 80 Item8 Item7 0.312
## 81 Item9 Item7 0.727
## 82 Item10 Item7 0.647
## 83 Item11 Item7 0.500
## 84 Item12 Item7 0.533
## 93 Item9 Item8 0.323
## 94 Item10 Item8 0.375
## 95 Item11 Item8 0.353
## 96 Item12 Item8 0.357
## 106 Item10 Item9 0.667
## 107 Item11 Item9 0.514
## 108 Item12 Item9 0.552
## 119 Item11 Item10 0.722
## 120 Item12 Item10 0.467
## 132 Item12 Item11 0.312
##
## $df.to.return.common.words.count.no.conjunctions
## Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1 19 6 3 8 4 13 9 5 9 11 12 4
## 2 6 17 3 4 3 4 4 3 4 4 4 2
## 3 3 3 8 3 6 3 4 4 4 3 3 4
## 4 8 4 3 14 3 7 7 5 7 6 6 4
## 5 4 3 6 3 11 3 4 5 4 3 3 4
## 6 13 4 3 7 3 16 9 5 8 10 12 4
## 7 9 4 4 7 4 9 14 4 10 9 8 6
## 8 5 3 4 5 5 5 4 14 4 5 5 4
## 9 9 4 4 7 4 8 10 4 14 9 8 6
## 10 11 4 3 6 3 10 9 5 9 15 12 5
## 11 12 4 3 6 3 12 8 5 8 12 18 4
## 12 4 2 4 4 4 4 6 4 6 5 4 11
##
## $common.words.count.no.conjunctions.long
## Var1 Var2 Freq
## 2 Item2 Item1 6
## 3 Item3 Item1 3
## 4 Item4 Item1 8
## 5 Item5 Item1 4
## 6 Item6 Item1 13
## 7 Item7 Item1 9
## 8 Item8 Item1 5
## 9 Item9 Item1 9
## 10 Item10 Item1 11
## 11 Item11 Item1 12
## 12 Item12 Item1 4
## 15 Item3 Item2 3
## 16 Item4 Item2 4
## 17 Item5 Item2 3
## 18 Item6 Item2 4
## 19 Item7 Item2 4
## 20 Item8 Item2 3
## 21 Item9 Item2 4
## 22 Item10 Item2 4
## 23 Item11 Item2 4
## 24 Item12 Item2 2
## 28 Item4 Item3 3
## 29 Item5 Item3 6
## 30 Item6 Item3 3
## 31 Item7 Item3 4
## 32 Item8 Item3 4
## 33 Item9 Item3 4
## 34 Item10 Item3 3
## 35 Item11 Item3 3
## 36 Item12 Item3 4
## 41 Item5 Item4 3
## 42 Item6 Item4 7
## 43 Item7 Item4 7
## 44 Item8 Item4 5
## 45 Item9 Item4 7
## 46 Item10 Item4 6
## 47 Item11 Item4 6
## 48 Item12 Item4 4
## 54 Item6 Item5 3
## 55 Item7 Item5 4
## 56 Item8 Item5 5
## 57 Item9 Item5 4
## 58 Item10 Item5 3
## 59 Item11 Item5 3
## 60 Item12 Item5 4
## 67 Item7 Item6 9
## 68 Item8 Item6 5
## 69 Item9 Item6 8
## 70 Item10 Item6 10
## 71 Item11 Item6 12
## 72 Item12 Item6 4
## 80 Item8 Item7 4
## 81 Item9 Item7 10
## 82 Item10 Item7 9
## 83 Item11 Item7 8
## 84 Item12 Item7 6
## 93 Item9 Item8 4
## 94 Item10 Item8 5
## 95 Item11 Item8 5
## 96 Item12 Item8 4
## 106 Item10 Item9 9
## 107 Item11 Item9 8
## 108 Item12 Item9 6
## 119 Item11 Item10 12
## 120 Item12 Item10 5
## 132 Item12 Item11 4
Μέσο πλήθος κοινών
λέξεων και μέση τιμή του δείκτη ομοιότητας μεταξύ όλων των διαφορετικών
ζευγών (συνάρτηση mean_common_words)
Παράδειγμα
mean_common_words(GOHAI.items.en, type = 'words')
## [1] 6.5
mean_common_words(GOHAI.items.en, type = 'index')
## [1] 0.407
Συσχέτιση μεταξύ των
αποκρίσεων
my_cor_table(my.data.frame, c('Item1', 'Item2', 'Item3', 'Item4'))
Pearson Correlation Coefficients (*:p<.05, **:p<.01,
***:p<.001)
|
|
Item1
|
Item2
|
Item3
|
Item4
|
Item1
|
-
|
.442
|
.028
|
.040
|
Item2
|
.442
|
-
|
.109
|
.202
|
Item3
|
.028
|
.109
|
-
|
-.507
|
Item4
|
.040
|
.202
|
-.507
|
-
|
Εσωτερική αξιοπιστία
της κλίμακας (συνάρτηση my_reliability)
my_reliability(factorname = 'MyFactor', variables = c('Item1', 'Item2', 'Item3', 'Item4'), data = my.data.frame)
##
## ΔΕΙΚΤΕΣ ΑΞΙΟΠΙΣΤΙΑΣ
##
## MyFactor
## alpha 0.242
## omega 335.743
## omega2 335.743
## omega3 388.718
## avevar 470.070
Ανάλυση δομής
(συναρτήσεις iclust, alpha, omega)
Πηγή: https://cran.r-project.org/web/packages/psychTools/vignettes/factor.pdf
iclust(my.data.frame)
## ICLUST (Item Cluster Analysis)
## Call: iclust(r.mat = my.data.frame)
##
## Purified Alpha:
## [1] 0.46
##
## G6* reliability:
## [1] 0.67
##
## Original Beta:
## [1] 0.068
##
## Cluster size:
## [1] 4
##
## Item by Cluster Structure matrix:
## [,1]
## Item1 0.36
## Item2 0.45
## Item3 -0.38
## Item4 0.60
##
## With Sums of squares of:
## [1] 0.84
##
## Purified scale intercorrelations
## reliabilities on diagonal
## correlations corrected for attenuation above diagonal:
## [,1]
## [1,] 0.46
##
## Cluster fit = 0.38 Pattern fit = 0.88 RMSR = 0.22
alpha(my.data.frame)
## Some items ( Item3 ) were negatively correlated with the first principal component and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = my.data.frame)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.24 0.18 0.38 0.052 0.22 0.3 2.3 0.6 0.074
##
## 95% confidence boundaries
## lower alpha upper
## Feldt -0.83 0.24 0.76
## Duhachek -0.34 0.24 0.82
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Item1 -0.24 -0.22 0.13 -0.065 -0.18 0.61 0.148 0.109
## Item2 -0.23 -0.62 -0.17 -0.146 -0.38 0.47 0.097 0.028
## Item3 0.43 0.47 0.42 0.228 0.89 0.24 0.041 0.202
## Item4 0.38 0.42 0.38 0.193 0.72 0.28 0.048 0.109
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Item1 12 0.84 0.70 0.538 0.308 2.8 1.60
## Item2 12 0.74 0.81 0.781 0.511 1.2 0.83
## Item3 12 0.32 0.29 -0.048 -0.099 3.5 1.00
## Item4 12 0.18 0.34 0.048 -0.115 1.8 0.72
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 miss
## Item1 0.08 0.17 0.17 0.25 0.17 0.17 0
## Item2 0.25 0.33 0.42 0.00 0.00 0.00 0
## Item3 0.00 0.00 0.17 0.33 0.33 0.17 0
## Item4 0.00 0.33 0.50 0.17 0.00 0.00 0
omega(my.data.frame)
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.18
## G.6: 0.38
## Omega Hierarchical: 0.26
## Omega H asymptotic: 0.39
## Omega Total 0.65
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 u2 p2
## Item1 0.29 0.50 -0.28 0.42 0.58 0.21
## Item2 0.52 0.63 0.68 0.32 0.39
## Item3 0.75 0.21 0.62 0.38 0.01
## Item4 0.20 -0.77 0.67 0.33 0.06
##
## With Sums of squares of:
## g F1* F2* F3*
## 0.40 1.14 0.68 0.17
##
## general/max 0.35 max/min = 6.7
## mean percent general = 0.17 with sd = 0.17 and cv of 1.02
## Explained Common Variance of the general factor = 0.17
##
## The degrees of freedom are -3 and the fit is 0
## The number of observations was 12 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 2 and the fit is 0.49
## The number of observations was 12 with Chi Square = 4 with prob < 0.14
## The root mean square of the residuals is 0.25
## The df corrected root mean square of the residuals is 0.43
##
## RMSEA index = 0.276 and the 10 % confidence intervals are 0 0.736
## BIC = -0.97
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.54 0.87 0.68 0.53
## Multiple R square of scores with factors 0.29 0.76 0.47 0.28
## Minimum correlation of factor score estimates -0.42 0.52 -0.07 -0.45
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.65 0.08 0.68 NA
## Omega general for total scores and subscales 0.26 0.08 0.23 NA
## Omega group for total scores and subscales 0.28 0.00 0.45 NA
Αναγνώριση
Χρονοσειράς
x=ma.sim(mu = 2, theta = -0.4, number=50)
my_lag_plot_ts(x, lag.start = 1, lag.end = 9)
my_plot_ts(x)
my_plot_ts_acf(x)
Προσομοίωση κατανομών
χ2 και Student’s t (για εκπαιδευτικούς λόγους)
Προσομοίωση
κατανομής Student t(n)
my_plot_t_dist(df = 15, t.test.statistic = 1.3)
Προσομοίωση
κατανομής x2(n)
my_plot_chi_square_dist(df = 6, x2.statistic = 4.332)
Προσομοίωση Δοκιμασιών
χ2 και t - test (για εκπαιδευτικούς λόγους)
Προσομοίωση one
sample t-test
mu = 500
sdp = 3
N = 30
simulate_one_sample_t_test(mu, sdp, N)
Προσομοίωση
δοκιμασίας ομοιογένειας χ2
N = 100
Hypothesis = c(1/2, 1/4, 1/2)
simulate_x2_homogeneity_test(Hypothesis, N)
Αποθήκευση
διαγράμματος σε αρχείο
Ένα γράφημα τελικά βρίσκει τη θέση του σε μία σελίδα της εργασίας.
Μία συνηθισμένη απαίτηση των επιστημονικών περιοδικών είναι τα
διαγράμματα να έχουν μέγεθος 12χ8cm με ανάλυση τουλάχιστον 600dpi. Η
εξαγωγή ενός γραφήματος σύμφωνα με αυτές τις προδιαγραφές, μπορεί να
γίνει απλά εκτελώντας την εντολή png
πριν την δημιουργία
του γραφήματος και την εντολή dev.off
μετά. Η χρήση της
εντολής png παρουσιάζεται στο παρακάτω ενδεικτικό παράδειγμα.
filename.for.plot = 'myplot.png'
png(filename.for.plot, res = 600, units = "mm", width=120, height=80)
plot_scatter(my.data.frame, agedata, examdata)
dev.off()
Η εκτέλεση του παραπάνω κώδικα θα δημιουργήσει το διάγραμμα και θα το
αποθηκευσει στο αρχείο myplot.png
, από το οποίο μπορεί να
μεταφερθεί στο έγγραφο επιλέγοντας στο MS Word ή στο LibreOffice Calc
Εισαγωγή -> Εικόνα.
Αποθήκευση dataframe
σε αρχείο Excel
Η εξαγωγή ενός dataframe σε αρχείο Excel μπορεί να γίνει με τις
παρακάτω εντολές:
library(writexl)
write_xlsx(my.df, 'file_name.xlsx')
Αποθήκευση dataframe
σε αρχείο SPSS
Η εξαγωγή ενός dataframe σε αρχείο SPSS μπορεί να γίνει με τις
παρακάτω εντολές:
library(haven)
write_xlsx(my.df, 'file_name.sav')