Έκδοση: 12 / 05 / 2024

Σημαντική Σημείωση
Όλες οι συναρτήσεις που χρησιμοποιούνται βρίσκονται στο αρχείο της γλώσσας R: MyRFunctions.R.

Για να γίνουν διαθέσιμες για χρήση αρκεί να φορτωθούν στο περιβάλλον της R, εκτελώντας την εντολή

source("https://utopia.duth.gr/epdiaman/files/kedivim/MyRFunctions.R")

(Στην περίπτωση όπου γίνει λήψη τοπικά το αρχείο MyRFunctions.R, πρέπει να αλλαξει ανάλογα και η διεύθυνση του αρχείου)

1 Διαχείριση δεδομένων

1.1 Μετατροπή όλων των στηλών με λίγες τιμές σε παράγοντες (συνάρτηση my_create_factors)

Παράδειγμα

success = c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1)
gender = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
my.data.frame = data.frame(success, gender)
summary(my.data.frame)
##     success         gender      
##  Min.   :0.00   Min.   :0.0000  
##  1st Qu.:0.75   1st Qu.:0.0000  
##  Median :1.00   Median :0.0000  
##  Mean   :0.75   Mean   :0.4167  
##  3rd Qu.:1.00   3rd Qu.:1.0000  
##  Max.   :1.00   Max.   :1.0000
my.data.frame.2 = my_create_factors(my.data.frame)
summary(my.data.frame.2)
##  success gender
##  0:3     0:7   
##  1:9     1:5

1.2 Ανάκτηση όλων των παραγόντων από ένα dataframe (συνάρτηση get_factors)

Παράδειγμα

exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)
print(get_factors(my.data.frame))
## [1] "gender"

1.3 Ανάκτηση ετικέτας μεταβλητής (labels) (συναρτήσεις get_label και get_df_labels)

1.3.1 Ανάκτηση μίας ή περισσοτέρων ετικετών

Παράδειγμα

exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)

Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$gender) = 'Φύλο'

print(get_label(my.data.frame[, 'gender']))
## [1] "Φύλο"
print(get_df_labels(my.data.frame, 'gender'))
## [1] "Φύλο"
print(get_df_labels(my.data.frame))
## [1] "Βαθμός εξετάσεων" "Φύλο"             "age"

1.4 Μετατροπή vector ή μεμονωμένου string για εμφάνιση σε html μορφή (συνάρτηση prepare_vector_for_HTML_output)

Παράδειγμα

all.inequalities = c("1 > 0", "3 < 5")
print(prepare_vector_for_HTML_output(all.inequalities))
## [1] "1 &#62; 0" "3 &#60; 5"

1.5 Μετατροπή των labels ενός dataframe για εμφάνιση σε html μορφή (συνάρτηση prepare_data_for_HTML_output)

Παράδειγμα

exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
agecat = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("0 - 35", ">35"))
my.data.frame = data.frame(exam, agecat)
print(my.data.frame$agecat)
##  [1] 0 - 35 0 - 35 >35    0 - 35 >35    0 - 35 >35    0 - 35 >35    0 - 35
## [11] >35    0 - 35
## Levels: 0 - 35 >35
Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$agecat) = 'Ηλικιακή κατηγορία'

my.data.frame.2 = prepare_data_for_HTML_output(my.data.frame.2)
print(my.data.frame.2$agecat)
## NULL

2 Descriptive Statistics

2.1 Καταμέτρηση Παρατηρήσεων (συνάρτηση case_report)

Παράδειγμα

sample.data = c(1, NA, 0, 4, 6, 1, NA, 1, 1, 0, 1, 1)
my_case_report(sample.data)
## $number.of.NA
## [1] 2
## 
## $valid.cases
## [1] 10
## 
## $total.cases
## [1] 12
## 
## $report.str
## [1] "12 (2 NA, 10 valid)"

2.2 Πίνακας συχνοτήτων (συνάρτηση fre)

library(expss)  
colors1 = c("Κόκκινο", "Μπλε", "Πράσινο", "Κόκκινο", "Μπλε", "Κόκκινο")
htmlTable(fre(colors1))
colors1  Count   Valid percent   Percent   Responses, %   Cumulative responses, % 
 Κόκκινο  3 50.0 50.0 50.0 50.0
 Μπλε  2 33.3 33.3 33.3 83.3
 Πράσινο  1 16.7 16.7 16.7 100.0
 #Total  6 100 100 100
 <NA>  0 0.0

2.3 Πίνακας συχνοτήτων πολλών μεταβλητών (συνάρτηση my_frequency_table)

Παράδειγμα

success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
my.data.frame = data.frame(success, gender)

Hmisc::label(my.data.frame$success) = 'Αποτέλεσμα'
Hmisc::label(my.data.frame$gender) = 'Φύλο'

my_frequency_table(my.data.frame, c('success', 'gender'))
Αποτέλεσμα (success)
Value  Count   Valid percent   Percent   Responses, %   Cumulative responses, % 
 Αποτυχία  3 25 25 25 25
 Επιτυχία  9 75 75 75 100
 #Total  12 100 100 100
 <NA>  0 0
Φύλο (gender)
Value  Count   Valid percent   Percent   Responses, %   Cumulative responses, % 
 Γυναίκα  7 58.3 58.3 58.3 58.3
 Άνδρας  5 41.7 41.7 41.7 100.0
 #Total  12 100 100 100
 <NA>  0 0.0

2.4 Πίνακας συμπτώσεων 2 ή περισσοτέρων μεταβλητών (συνάρτηση my_contigency_table)

Παράδειγμα

success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
residence = factor(c(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1), levels = c(0, 1), labels = c("Μικρή πόλη", "Μεγάλη πόλη"))
my.data.frame = data.frame(success, gender, residence)

my_contigency_table(my.data.frame, c('gender', 'success'))
Cross tabulation of gender, success
success  
Αποτυχία Επιτυχία   Sum
gender
  Γυναίκα 1 6   7
  Άνδρας 2 3   5
  Sum 3 9   12
my_contigency_table(my.data.frame, c('residence', 'success', 'gender'))
Cross tabulation of residence, success, gender
success_gender  
Αποτυχία_Γυναίκα Αποτυχία_Άνδρας Επιτυχία_Γυναίκα Επιτυχία_Άνδρας   Sum
residence
  Μικρή πόλη 0 0 2 1   3
  Μεγάλη πόλη 1 2 4 2   9
  Sum 1 2 6 3   12
my_contigency_table(my.data.frame, c('residence', 'success', 'gender'), row.vars = c(1, 3))
Cross tabulation of residence, success, gender
success  
Αποτυχία Επιτυχία   Sum
residence_gender
  Μικρή πόλη_Γυναίκα 0 2   2
  Μικρή πόλη_Άνδρας 0 1   1
  Μεγάλη πόλη_Γυναίκα 1 4   5
  Μεγάλη πόλη_Άνδρας 2 2   4
  Sum 3 9   12

2.5 Mode (συνάρτηση smode)

smode=function(x){
  # Πηγή: https://stat.ethz.ch/pipermail/r-help/2011-March/273569.html
  xtab=table(x)
  modes=xtab[max(xtab)==xtab]
  mag=as.numeric(modes[1]) #in case mult. modes, this is safer
  themodes=names(modes)
  mout=list(themodes=themodes,modeval=mag)
  return(mout)}

Παράδειγμα

score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2) 
smode(score)
## $themodes
## [1] "6"  "12" "17"
## 
## $modeval
## [1] 2

2.6 Mean & SD (συνάρτηση mean_sd)

Παράδειγμα

score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2) 
my_mean_sd(score)
## [1] "M = 12.1 (6.72), 95% C.I. 9.01 - 15.2"

2.7 Explore (συνάρτηση my_explore_vars)

Παράδειγμα

exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(exam, gender, age)

Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$age) = 'Ηλικία'

my_explore_vars(my.data.frame, c("exam", "age"))
Descriptive statistics of Βαθμός εξετάσεων, Ηλικία
Variable N Missing Valid M (SD) CI95
1 Βαθμός εξετάσεων 12 0 12 66.7 (21.7) 52.9 - 80.4
2 Ηλικία 12 0 12 23.7 (6.75) 19.4 - 28

2.8 Explore by group (συνάρτηση my_explore_vars_by_group)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)

my_explore_vars_by_group(my.data.frame, 'Βαθμός', 'Φύλο', type = 'all')
Descriptive Statistics of Βαθμός among levels of Φύλο
Group N M (SD) 95% C.I. Median Interquartile Range
1 7 80.7 (14.6) 67.3 - 94.2 80 25.0
2 5 47 (12) 32 - 62 50 15.0
Total 12 66.7 (21.7) 52.9 - 80.4 65 28.8
Tukeys HSD Post Hoc Test for variable: Βαθμός
The Tukey HSD is not a Post Hoc Test per se. It may provide valuable results as an independent test as well.
Difference Lower Upper p
2-1 -33.714 -51.466 -15.962 0.002

3 Έλεγχος κανονικότητας (συνάρτηση my_check_normality_of_vector)

Παράδειγμα

score = c(12, 14, 17, 13, 19, 28, 20, 9, 3, 6, 5, 11, 12, 17, 16, 8, 6, 2) 
my_check_normality_of_vector(score)
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness 0.483 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.88 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.968, p = 0.753 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.096, p = 0.931 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.

3.1 Έλεγχος κανονικότητας ανά υποομάδα (συνάρτηση my_check_normality_of_column_by_group)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)
my_check_normality_of_column_by_group(my.data.frame$Ηλικία, my.data.frame$Φύλο)
Γυναίκα
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness -0.473 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.675 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.959, p = 0.811 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.214, p = 0.428 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.
Άνδρας
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness 0.898 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.505 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.903, p = 0.427 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.241, p = 0.441 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.

4 Έλεγχος ομοιογένειας (ίση διακύμανση μίας μεταβλητής μεταξύ των επιπέδων ενός παράγοντα) (συνάρτηση my_check_homogeneity_of_column)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(examdata, genderdata, agedata)

library(Hmisc)
var.labels = c(examdata = "Βαθμός Εξέτασης", agedata = 'Ηλικία', genderdata = 'Φύλο')
label(my.data.frame) = as.list(var.labels[match(names(my.data.frame), names(var.labels))])

my_check_homogeneity_of_column(my.data.frame, 'examdata', 'genderdata')
Standard Deviation of Βαθμός Εξέτασης among levels of Φύλο
Level Γ Α
Group Size 7 5
SD 14.6 12.0
Levene test of variance equality (homogeneity) of Βαθμός Εξέτασης over Φύλο
F(1, 10) = 0.622, p = 0.448. Homogeneity hypothesis is confirmed.
Bartlett’s test of variance equality (homogeneity) of Βαθμός Εξέτασης over Φύλο
If you have strong evidence that your data do in fact come from a normal, or nearly normal, distribution, then Bartlett’s test has better performance.
c2(1) = 0.152, p = 0.697. Homogeneity hypothesis is confirmed.

5 Correlation (συνάρτηση my_cor_table)

Παράδειγμα

exam = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
test = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
age = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)

my.data.frame = data.frame(exam, test, gender, age)

Hmisc::label(my.data.frame$exam) = 'Βαθμός εξετάσεων'
Hmisc::label(my.data.frame$test) = 'Βαθμός προόδου'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$age) = 'Ηλικία'

# Πίνακες 
my_cor_table(my.data.frame, c('exam', 'test', 'age'))
Pearson Correlation Coefficients (*:p<.05, **:p<.01, ***:p<.001)
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων - .929*** .859***
Βαθμός προόδου .929*** - .841***
Ηλικία .859*** .841*** -
# Πίνακες 2: Δείξε μόνο τις συσχετίσεις που είναι μεγαλύτερες από 0,9
my_cor_table_long(my.data.frame, c('exam', 'test', 'age'), dontshowbelow = 0.9)
Correlations
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 1 0.929
Βαθμός προόδου 0.929 1
Ηλικία 1
Observations
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 12 12 12
Βαθμός προόδου 12 12 12
Ηλικία 12 12 12
Significances
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 0 0
Βαθμός προόδου 0 0.001
Ηλικία 0 0.001
# Πίνακες 3: Δείξε μόνο τις συσχετίσεις που είναι στατιστικώς σημαντικές
my_cor_table_long(my.data.frame, c('exam', 'test', 'age'), show.only.significant = TRUE)
Correlations
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 1 0.929 0.859
Βαθμός προόδου 0.929 1 0.841
Ηλικία 0.859 0.841 1
Observations
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 12 12 12
Βαθμός προόδου 12 12 12
Ηλικία 12 12 12
Significances
Βαθμός εξετάσεων Βαθμός προόδου Ηλικία
Βαθμός εξετάσεων 0 0
Βαθμός προόδου 0 0.001
Ηλικία 0 0.001
# Διάγραμμα
my_cor_plot(my.data.frame, c('exam', 'test', 'age'))

5.1 Correlation ανά υποομάδα με υπολογισμό p value και 95% διάστημα εμπιστοσύνης (συνάρτηση my_cortable_by_factor_with_p)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata, Φύλο = genderdata, Ηλικία = agedata)

my_cortable_by_factor_with_p(my.data.frame, c('Εξετάσεις', 'Πρόοδος', 'Ηλικία'), afactor = 'Φύλο')
Correlation of Εξετάσεις, Πρόοδος, Ηλικία across levels of Φύλο
Var1 Var2 Group1 Group2 Corr1 N1 Corr2 N2 p.value lowerCI upperCI
1 Εξετάσεις Πρόοδος Γ Α 0.901 7 0.952 5 0.662 -0.495 0.47
2 Εξετάσεις Ηλικία Γ Α 0.55 7 0.876 5 0.393 -1.23 0.651
3 Πρόοδος Ηλικία Γ Α 0.665 7 0.928 5 0.33 -1.107 0.467

5.2 Μερική συσχέτιση (Partial Correlation) (συνάρτηση pcor.test)

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)

library(ppcor)
ppcor::pcor.test(testdata, examdata, agedata)
##    estimate     p.value statistic  n gp  Method
## 1 0.7476411 0.008160923  3.377373 12  1 pearson
cor(testdata, examdata)
## [1] 0.929472

6 Πολλαπλή δοκιμασία χι - τετράγωνο

Παράδειγμα 1: Εφαρμογή σε πίνακα συχνοτήτων

the.table = matrix(c(10, 11, 5, 8, 8, 6, 8, 6, 15, 25), 2, 5, byrow=TRUE)
rownames(the.table) = c('Αποτυχία', 'Επιτυχία')
colnames(the.table) = c('Πολύ αρνητική', 'Αρνητική', 'Ουδέτερη', 'Θετική',  'Πολύ θετική')

my_chi_square(the.table)

Test Var1 - Var2.
x2(4) = 9.57, p = 0.048. Result: DEPENDEND VARIABLES.

Observed Frequencies between combinations of Var1 - Var2
The statistic x2 reflects the overall difference between observed and expected frequencies.
Πολύ αρνητική Αρνητική Ουδέτερη Θετική Πολύ θετική Sum
Αποτυχία 10 11 5 8 8 42
Επιτυχία 6 8 6 15 25 60
Sum 16 19 11 23 33 102
Expected Frequencies
Less Than 5: 1 (10%)
According to Moore & McCabe, no more than 20% of the expected counts should be less than 5. Some expected counts can be <5, provided none <1, and 80% of the expected counts should be equal to or greater than 5.
Πολύ αρνητική Αρνητική Ουδέτερη Θετική Πολύ θετική Sum
Αποτυχία 6.6 7.8 4.5 9.5 13.6 42
Επιτυχία 9.4 11.2 6.5 13.5 19.4 60
Sum 16 19 11 23 33 102

Παράδειγμα 2: Εφαρμογή σε μεταβλητές dataframe

gender = factor(c(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
agecat = factor(c(1, 2, 3, 3, 3, 2, 2, 2, 1, 1, 1, 2), levels = c(1, 2, 3), labels = c("15 - 24", "25 - 34", ">34"))
success = factor(c(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία"))

my.data.frame = data.frame(gender, agecat, success)

Hmisc::label(my.data.frame$success) = 'Επιτυχία στις εξετάσεις'
Hmisc::label(my.data.frame$gender) = 'Φύλο'
Hmisc::label(my.data.frame$agecat) = 'Ηλικιακή κατηγορία'

my_chi_square(my.data.frame, c('gender', 'agecat', 'success'))

Test Φύλο - Ηλικιακή κατηγορία.
x2(2) = 4.32, p = 0.115. Result: Independent Variables.

Observed Frequencies between combinations of Φύλο - Ηλικιακή κατηγορία
The statistic x2 reflects the overall difference between observed and expected frequencies.
15 - 24 25 - 34 >34 Sum
Γυναίκα 0 3 2 5
Άνδρας 4 2 1 7
Sum 4 5 3 12
Expected Frequencies
Less Than 5: 6 (100%)
According to Moore & McCabe, no more than 20% of the expected counts should be less than 5. Some expected counts can be <5, provided none <1, and 80% of the expected counts should be equal to or greater than 5.
15 - 24 25 - 34 >34 Sum
Γυναίκα 1.7 2.1 1.2 5
Άνδρας 2.3 2.9 1.8 7
Sum 4 5 3 12

Test Φύλο - Επιτυχία στις εξετάσεις.
x2(1) = 3.09, p = 0.079. Result: Independent Variables.

Observed Frequencies between combinations of Φύλο - Επιτυχία στις εξετάσεις
The statistic x2 reflects the overall difference between observed and expected frequencies.
Αποτυχία Επιτυχία Sum
Γυναίκα 4 1 5
Άνδρας 2 5 7
Sum 6 6 12
Expected Frequencies
Less Than 5: 4 (100%)
According to Moore & McCabe, no more than 20% of the expected counts should be less than 5. Some expected counts can be <5, provided none <1, and 80% of the expected counts should be equal to or greater than 5.
Αποτυχία Επιτυχία Sum
Γυναίκα 2.5 2.5 5
Άνδρας 3.5 3.5 7
Sum 6 6 12

Test Ηλικιακή κατηγορία - Επιτυχία στις εξετάσεις.
x2(2) = 3.13, p = 0.209. Result: Independent Variables.

Observed Frequencies between combinations of Ηλικιακή κατηγορία - Επιτυχία στις εξετάσεις
The statistic x2 reflects the overall difference between observed and expected frequencies.
Αποτυχία Επιτυχία Sum
15 - 24 1 3 4
25 - 34 4 1 5
>34 1 2 3
Sum 6 6 12
Expected Frequencies
Less Than 5: 6 (100%)
According to Moore & McCabe, no more than 20% of the expected counts should be less than 5. Some expected counts can be <5, provided none <1, and 80% of the expected counts should be equal to or greater than 5.
Αποτυχία Επιτυχία Sum
15 - 24 2 2 4
25 - 34 2.5 2.5 5
>34 1.5 1.5 3
Sum 6 6 12

7 Δοκιμασία t - test

7.1 Δοκιμασία για ένα δείγμα (One sample t - test) (συνάρτηση my_t_test_one_sample)

Παράδειγμα 1: Εφαρμογή σε vector

one.sample.data = c(490, 503, 499, 492, 500, 501, 489, 478, 498, 508)
my_t_test_one_sample(one.sample.data, mu = 500)
M(SD) (496 ± 8.64), N = 10. H0: μ = 500 vs H1: μ ≠ 500. H0 is not rejected (t(9) = 1.538, p = 0.159).

Παράδειγμα 2: Εφαρμογή σε dataframe

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata)

my_t_test_one_sample(my.data.frame, var = 'Εξετάσεις', mu = 60)
Εξετάσεις (66.7 ± 21.7), N = 12. H0: μ = 60 vs H1: μ ≠ 60. H0 is not rejected (t(11) = 1.066, p = 0.309).
my_t_test_one_sample(my.data.frame, var = 'Πρόοδος', mu = 60)
Πρόοδος (67.4 ± 22), N = 12. H0: μ = 60 vs H1: μ ≠ 60. H0 is not rejected (t(11) = 1.166, p = 0.268).

7.2 Δοκιμασία δύο ανεξάρτητων δειγμάτων (Independent samples t - test) (συνάρτηση my_t_test_independent_samples)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
testdata = c(55, 70, 68, 50, 53, 87, 45, 92, 56, 99, 35, 99)
genderdata = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γ", "Α"))
locationdata = factor(c(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0), levels = c(0, 1), labels = c("Χωριό/Κωμόπολη", "Πόλη"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Εξετάσεις = examdata, Πρόοδος = testdata, Φύλο = genderdata, Ηλικία = agedata, Τοποθεσία = locationdata)

my_t_test_independent_samples(my.data.frame, dependent.vars = c('Εξετάσεις', 'Πρόοδος', 'Ηλικία'), group.var = c('Φύλο', 'Τοποθεσία'), print.also.in.console = FALSE)
Εξετάσεις over Φύλο H0: μΓ = μΑ vs H1: μΓ ≠ μΑ. H0 is rejected.
Group Γ (N = 7): 80.7 ± 14.6 vs Group Α (N = 5): 47 ± 12, t(10) = 4.232, p = 0.002.
Equality of variances: F(6, 4) = 1.461, p = 0.744.
Πρόοδος over Φύλο H0: μΓ = μΑ vs H1: μΓ ≠ μΑ. H0 is rejected.
Group Γ (N = 7): 78.9 ± 20.5 vs Group Α (N = 5): 51.4 ± 12.3, t(10) = 2.646, p = 0.024.
Equality of variances: F(6, 4) = 2.77, p = 0.343.
Ηλικία over Φύλο H0: μΓ = μΑ vs H1: μΓ ≠ μΑ. H0 is rejected.
Group Γ (N = 7): 28 ± 5.1 vs Group Α (N = 5): 17.6 ± 2.7, t(10) = 4.127, p = 0.002.
Equality of variances: F(6, 4) = 3.562, p = 0.239.
Εξετάσεις over Τοποθεσία H0: μΠόλη = μΧωριό/Κωμόπολη vs H1: μΠόλη ≠ μΧωριό/Κωμόπολη. H0 is not rejected.
Group Πόλη (N = 6): 61.7 ± 22.9 vs Group Χωριό/Κωμόπολη (N = 6): 71.7 ± 21.1, t(10) = 0.785, p = 0.451.
Equality of variances: F(5, 5) = 1.179, p = 0.861.
Πρόοδος over Τοποθεσία H0: μΠόλη = μΧωριό/Κωμόπολη vs H1: μΠόλη ≠ μΧωριό/Κωμόπολη. H0 is not rejected.
Group Πόλη (N = 6): 63.8 ± 21.3 vs Group Χωριό/Κωμόπολη (N = 6): 71 ± 24.2, t(10) = 0.545, p = 0.598.
Equality of variances: F(5, 5) = 0.774, p = 0.786.
Ηλικία over Τοποθεσία H0: μΠόλη = μΧωριό/Κωμόπολη vs H1: μΠόλη ≠ μΧωριό/Κωμόπολη. H0 is not rejected.
Group Πόλη (N = 6): 24.3 ± 7.69 vs Group Χωριό/Κωμόπολη (N = 6): 23 ± 6.32, t(10) = 0.328, p = 0.75.
Equality of variances: F(5, 5) = 1.477, p = 0.679.

7.3 Δοκιμασία για ζευγαρωτές παρατηρήσεις (Paired samples t - test) (συνάρτηση my_t_test_paired_samples)

Παράδειγμα

weight1 = c(81.2, 76.7, 75.7, 81.2, 71.7, 71.2, 68.5, 89.8, 107.5, 105.7, 99.3, 100.7, 90.3, 105.7, 98.0, 116.6)
weight2 = c(78.0, 73.0, 73.0, 78.5, 69.9, 64.9, 63.5, 87.1, 102.1, 102.5, 97.1, 95.3, 87.5, 102.5, 93.4, 112.9)
weight3 = c(78.4, 72.1, 73.7, 78.5, 69.7, 65.4, 63.3, 85.5, 101.3, 102.2, 95.7, 93.9, 86.5, 102.5, 93.9, 113.9)
my.data.frame = data.frame(weight1, weight2, weight3)
Hmisc::label(my.data.frame$weight1) = '1η Μέτρηση'
Hmisc::label(my.data.frame$weight2) = '2η Μέτρηση'
Hmisc::label(my.data.frame$weight3) = '3η Μέτρηση'

my_t_test_paired_samples(my.data.frame, c('weight1', 'weight2', 'weight3'))
1η Μέτρηση (90 ± 15.2) vs 2η Μέτρηση (86.3 ± 15.2), N = 16 pairs. H0: μ1η Μέτρηση = μ2η Μέτρηση vs H1: μ1η Μέτρηση ≠ μ2η Μέτρηση. H0 is rejected (t(15) = 11.2, p < 0.001).

1η Μέτρηση (90 ± 15.2) vs 3η Μέτρηση (86 ± 15.1), N = 16 pairs. H0: μ1η Μέτρηση = μ3η Μέτρηση vs H1: μ1η Μέτρηση ≠ μ3η Μέτρηση. H0 is rejected (t(15) = 10.9, p < 0.001).

2η Μέτρηση (86.3 ± 15.2) vs 3η Μέτρηση (86 ± 15.1), N = 16 pairs. H0: μ2η Μέτρηση = μ3η Μέτρηση vs H1: μ2η Μέτρηση ≠ μ3η Μέτρηση. H0 is not rejected (t(15) = 1.45, p = 0.168).

8 Γραμμική παλινδρόμηση (Linear Regression) (συνάρτηση my_lm)

Παράδειγμα

test = c(5.5, 6, 7, 5, 8, 2, 9, 10, 2, 3, 4, 6.5, 8.5, 1)
exam = c(6.5, 6, 8, 7.5, 7, 4, 8, 10, 1, 5, 5, 6, 9, 5)
my.df = data.frame(test, exam)
Hmisc::label(my.df$test) = 'Πρόοδος'
Hmisc::label(my.df$exam) = 'Τελικές εξετάσεις'

my.model = my_lm(my.df, exam ~ test)
my.model$htmlreport
Linear regression results for Τελικές εξετάσεις
Model: exam ~ const + test
  95% C.I.
B SE t p   Lower Upper
Constant 2.501 0.747 3.348 0.006   0.873 4.129
Πρόοδος 0.684 0.121 5.651 <0.001   0.42 0.947
Coefficient of determination
R2 = 0.727
Homoscedasticity Breusch–Pagan test
x2(1) = 2.905, p = 0.088. Homoscedasticity assumption is not rejected. Model seems to be valid.
Normality tests of models’ residuals
Statistic Description
Skewness & Kurtosis
  Skewness -0.704 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 3.619 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.943, p = 0.455 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.136, p = 0.69 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.

9 ANOVA (συνάρτηση my_ANOVA)

Παράδειγμα

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(examdata, genderdata, agedata)

library(Hmisc)
var.labels = c(examdata = "Βαθμός Εξέτασης", agedata = 'Ηλικία', genderdata = 'Φύλο')
label(my.data.frame) = as.list(var.labels[match(names(my.data.frame), names(var.labels))])

# Εκτέλεση με δήλωση όλων των μεταβλητών για post - hoc έλεγχο
all.ANOVA.output = my_ANOVA(data = my.data.frame, model = examdata ~ agedata + genderdata)

all.ANOVA.output$htmlTable
ANOVA Results for Βαθμός Εξέτασης
SS df F p
(Intercept) 159.721 1 1.244 0.294
agedata 695.902 1 5.42 0.045
genderdata 198.218 1 1.544 0.245
Residuals 1155.527 9
LSD Test for variable: Φύλο
Fisher’s LSD is a series of pairwise t-tests, with each test using the mean squared error from the significant ANOVA as its pooled variance estimate (and naturally taking the associated degrees of freedom). It is a valid test, in case where the ANOVA is significant.
N Βαθμός Εξέτασης Groups
Α 5 47 b
Γ 7 80.714 a
Tukeys HSD Post Hoc Test for variable: Φύλο
The Tukey HSD is not a Post Hoc Test per se. It may provide valuable results even if ANOVA is not significant.
Difference Lower Upper p
Α-Γ -33.714 -51.466 -15.962 0.002
Standard Deviation of Βαθμός Εξέτασης among levels of all levels of genderdata.
Level Γ Α
Group Size 7 5
SD 14.6 12.0
Levene test of variance equality (homogeneity) of Βαθμός Εξέτασης over all levels of genderdata.
F(1, 10) = 0.622, p = 0.448. Homogeneity hypothesis is confirmed.
Bartlett’s test of variance equality (homogeneity) of Βαθμός Εξέτασης over all levels of genderdata.
If you have strong evidence that your data do in fact come from a normal, or nearly normal, distribution, then Bartlett’s test has better performance.
c2(1) = 0.152, p = 0.697. Homogeneity hypothesis is confirmed.
Pearson Correlation Coefficients (*:p<.05, **:p<.01, ***:p<.001)
Βαθμός Εξέτασης Ηλικία
Βαθμός Εξέτασης - .859***
Ηλικία .859*** -
Normality test for model residuals.
If the main goal of an ANOVA is to see whether or not certain effects are significant, then the assumption of normality of the residuals is only required for small samples, thanks to the central limit theorem. With sample sizes of a few hundred participants even extreme violations of the normality assumptions are unproblematic. So mild violations of this assumptions are usually no problem with sample sizes exceeding 30.
Statistic Description
Skewness & Kurtosis
  Skewness -0.564 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.149 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.898, p = 0.147 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.241, p = 0.053 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.

10 Repeated Measures ANOVA (συνάρτηση my_Repeated_ANOVA)

Παράδειγμα

gender = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(81.2, 76.7, 75.7, 81.2, 71.7, 71.2, 68.5, 89.8, 107.5, 105.7, 99.3, 100.7, 90.3, 105.7, 98.0, 116.6)
weight2 = c(78.0, 73.0, 73.0, 78.5, 69.9, 64.9, 63.5, 87.1, 102.1, 102.5, 97.1, 95.3, 87.5, 102.5, 93.4, 112.9)
weight3 = c(78.4, 72.1, 73.7, 78.5, 69.7, 65.4, 63.3, 85.5, 101.3, 102.2, 95.7, 93.9, 86.5, 102.5, 93.9, 113.9)
dataF = data.frame(gender, weight1, weight2, weight3)

all.necessary.data = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols =  c('weight1', 'weight2', 'weight3'))
all.necessary.data$anova.table
Repeated ANOVA table: Greenhouse-Geisser correction applied.
Effect df MSE F p.value
1 gender 1, 14 163.04 49.36 *** <.001
2 time 1.57, 22.05 0.97 99.35 *** <.001
3 gender:time 1.57, 22.05 0.97 0.69 .480
weight1
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness 0.157 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.832 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.965, p = 0.756 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.131, p = 0.657 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.
weight2
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness 0.211 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.555 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.967, p = 0.787 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.103, p = 0.919 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.
weight3
Normality statistics
Statistic Description
Skewness & Kurtosis
  Skewness 0.302 Normal distribution has skew = 0. For a unimodal distribution, negative skew commonly indicates that the tail is on the left side of the distribution, and positive skew indicates that the tail is on the right.
  Kurtosis 2.802 Normal distribution has kurtosis = 3. Values over 3 indicates a platykurtic distribution and values less than 3 indicates a leptokurtic distribution.
Normality Tests. Η0: This sample is from a normal distribution vs Η1: Not the Η0.
  Shapiro–Wilk W = 0.969, p = 0.821 This test is more appropriate method for small sample sizes (<50 samples) although it can also be handling on larger sample size.
  Lilliefors D = 0.114, p = 0.833 The Lilliefors test uses the same calculations as the Kolmogorov-Smirnov test, but it is more conservative in the sense that the Lilliefors Test is less likely to show that data is normally distributed.
Mauchly’s Test for Sphericity. Tests the hypothesis that all pairs of measurements have equal variance in their difference.
Mauchly’s W p
time 0.621 0.045
gender:time 0.621 0.045

11 Post Hoc Test (LSD, Tukeys) (συναρτήσεις my_post_hoc_LSD_test, my_post_hoc_Tukeys_test)

Παράδειγμα (LSD)

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)

my_post_hoc_LSD_test(data = my.data.frame, model = Βαθμός ~ Ηλικία + Φύλο, variable = 'Φύλο')
## [1] "LSD Test"
##     Βαθμός groups
## Γ 80.71429      a
## Α 47.00000      b

Παράδειγμα (Tukey’s 1)

examdata = c(65, 65, 60, 70, 55, 80, 40, 90, 50, 100, 30, 95)
genderdata = c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0)
genderdata = factor(genderdata, levels = c(0, 1), labels = c("Γ", "Α"))
agedata = c(29, 28, 22, 19, 18, 28, 16, 25, 17, 35, 15, 32)
my.data.frame = data.frame(Βαθμός = examdata, Φύλο = genderdata, Ηλικία = agedata)

my_post_hoc_Tukeys_test(data = my.data.frame, model = Βαθμός ~ Φύλο, variable = 'Φύλο')
## [1] "Tukey Test"
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = model, data = data)
## 
## $Φύλο
##          diff       lwr       upr     p adj
## Α-Γ -33.71429 -51.46649 -15.96208 0.0017394

Παράδειγμα (Tukey’s 2)

examdata = c(65, 65, 95, 70, 55, 90, 98, 90, 50, 100, 30, 95)
classdata = factor(c(3, 1, 2, 1, 1, 2, 2, 3, 3, 3, 1, 2), labels = c("Α", "Β", "Γ"))
my.data.frame = data.frame(Βαθμός = examdata, Τάξη = classdata)

library(multcompView)
model=lm(my.data.frame$Βαθμός ~ my.data.frame$Τάξη)
TUKEY <- TukeyHSD(x=aov(model))
TK_data<-round(as.data.frame((TUKEY)[1]), 3)
names(TK_data) = c('Difference', 'Lower', 'Upper', 'p')
htmlTable::htmlTable(TK_data)     
Difference Lower Upper p
Β-Α 39.5 6.257 72.743 0.022
Γ-Α 21.25 -11.993 54.493 0.229
Γ-Β -18.25 -51.493 14.993 0.322
plot(TUKEY , las=2 , col="brown")

12 Reliability (Alpha, Omega) (συνάρτηση my_reliability)

Παράδειγμα

sample.data = data.frame(item1 = c(3, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 2, 3, 2, 3, 2, 3, 2), 
                         item2 = c(1, 2, 2, 2, 1, 2, 1, 1, 3, 2, 1, 2, 2, 2, 1, 2, 1, 1, 3, 2), 
                         item3 = c(1, 3, 2, 2, 3, 2, 1, 3, 3, 2, 1, 3, 2, 2, 3, 2, 1, 3, 3, 2))
my_reliability(factorname = 'MyFactor', variables = c("item1", "item2", "item3"), data = sample.data)
## 
## ΔΕΙΚΤΕΣ ΑΞΙΟΠΙΣΤΙΑΣ
## 
##        MyFactor
## alpha     0.000
## omega     0.298
## omega2    0.298
## omega3    0.298
## avevar    0.298

13 Plots

13.1 Bar Plot (Ραβδόγραμμα) (συναρτήσεις my_bar_plot, my_bar_plot2, my_bar_plot_percent)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot(outcome)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot2(outcome)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_bar_plot_percent(outcome)

13.2 Pie Plot (Κυκλικό διάγραμμα) (συναρτήσεις my_pie_plot, my_pie_3dplot)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_pie_plot(outcome)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
my_pie_3dplot(outcome)

13.3 Stack Plot (Ραβδόγραμμα δύο ποιοτικών μεταβλητών) (συνάρτηση my_stack_plot)

Παράδειγμα 1: Εφαρμογή σε πίνακα

the.table = matrix(c(10, 11, 5, 8, 8, 6, 8, 6, 15, 25), 2, 5, byrow=TRUE)
rownames(the.table) = c('Αποτυχία', 'Επιτυχία')
colnames(the.table) = c('Πολύ αρνητική', 'Αρνητική', 'Ουδέτερη', 'Θετική',  'Πολύ θετική')
my_stack_plot(the.table, type = 3, flip.plot = TRUE)

Παράδειγμα 2: Εφαρμογή σε dataframe

library(Hmisc)
sample.data <- tibble(
  success = factor(c(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1), levels = c(0, 1), labels = c("Αποτυχία", "Επιτυχία")),
  gender = factor(c(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0), levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας")))

label(sample.data$success) = 'Αποτέλεσμα'
label(sample.data$gender) = 'Φύλο'
my_stack_plot(sample.data, 'gender', 'success', type = 1)

my_stack_plot(sample.data, 'gender', 'success', type = 2)

my_stack_plot(sample.data, 'gender', 'success', type = 3, flip.plot = TRUE)

13.4 Συχνότητες εμφάνισης τιμών (συνάρτηση my_tab_fun)

Παράδειγμα

outcome = c("Όχι", "Όχι", "Ίσως", "Ναι", "Όχι", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
gender = c("Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι", "Αγόρι", "Αγόρι", "Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι",  "Αγόρι")
sample.data = data.frame(gender = gender, outcome = outcome)
my_tab_fun(sample.data)
##         Ίσως Ναι Όχι Sum
## gender     6   6   6  18
## outcome    3   5   4  12

13.5 Binary Plot (Ραβδόγραμμα πολλών δίτιμων μεταβλητών) (συνάρτηση my_binary_vars_plot)

Παράδειγμα

data.for.plot = data.frame(Χαρακτηριστικό1 = c(0, 0, 0, 1, 1, 0), Χαρακτηριστικό2 = c(1, 0, 1, 0, 0, 1), Χαρακτηριστικό3 = c(1, 1, 1, 0, 1, 0))
my_tab_fun(data.for.plot)
##                 X0 X1 Sum
## Χαρακτηριστικό1  4  2   6
## Χαρακτηριστικό2  3  3   6
## Χαρακτηριστικό3  2  4   6
my_binary_vars_plot(data.for.plot, xlab = 'Χαρακτηριστικά', labels = c('1o', '2o', '3o'))

13.6 Ιστόγραμμα (Histogram) (συνάρτηση my_histFrequency)

Παράδειγμα

sampledata = c(11.8, 3.6, 16.6, 13.5, 4.8, 8.3, 8.9, 9.1, 7.7, 2.3, 12.1, 6.1, 10.2, 8.0, 11.4, 6.8, 9.6, 19.5, 15.3, 12.3, 8.5, 15.9, 18.7, 11.7, 6.2, 11.2, 10.4, 7.2, 5.5, 14.5 )
my_histFrequency(sampledata, xlab = 'Τιμή')

Παράδειγμα

sampledata = rnorm(100, 0, 1)
my_histFrequency_with_normal_curve(sampledata, xlab = 'Τιμή')

Παράδειγμα

sampledata = rnorm(100, 0, 1)
my_histPercent(sampledata, xlab = 'Τιμή')

Παράδειγμα

value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple(sampledata$value, sampledata$group, xlab = 'Τιμή')

Παράδειγμα

value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple_density(sampledata$value, sampledata$group, xlab = 'Τιμή')

Παράδειγμα

value = c(rnorm(1000, 0, 1), rnorm(1000, 2.5, 1))
group = c(rep(1, 1000), rep(2, 1000))
sampledata = data.frame(group = group, value = value)
my_hist_multiple_density_sidebyside(sampledata$value, sampledata$group, xlab = 'Τιμή')

13.7 Διάγραμμα Μέσων Τιμών μίας ποσοτικής μεταβλητής

yvalues = rnorm(1000, 0, 1)
afactor = as.factor(rbinom(1000, 3, 0.2))
data = data.frame(yvalues = yvalues, afactor = afactor)
Hmisc::label(yvalues) = 'Y'
Hmisc::label(afactor) = 'Factor'

gplots::plotmeans(yvalues ~ afactor, data = data, xlab=Hmisc::label(afactor), ylab=Hmisc::label(yvalues))

13.8 Διάγραμμα αλληλεπίδρασης δύο παραγόντων πάνω σε μία συνεχή μεταβλητή (Interaction Plot) (συνάρτηση my.interaction.plot)

my.interaction.plot = function(column.to.describe, afactor1, afactor2, legend.label = '', xlab = '', ylab = '', main = ''){
interaction.plot(afactor1, afactor2, 
                 column.to.describe, type="b", col=c("red","blue"), 
                 legend=T, trace.label = legend.label, lty=c(1,2), lwd=2, pch=c(18,24), 
                 xlab=xlab,  ylab=ylab,
                 main=main, 
                 ylim = c(min(column.to.describe), max(column.to.describe)))
}

Παράδειγμα

answer = c("Όχι", "Όχι", "Ίσως", "Ναι", "Όχι", "Ναι", "Ναι", "Ίσως", "Ναι", "Ίσως", "Ναι", "Όχι")
gender = c("Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι", "Αγόρι", "Αγόρι", "Αγόρι", "Αγόρι", "Κορίτσι", "Κορίτσι", "Κορίτσι",  "Αγόρι")
score = c(13, 14, 20, 20, 19, 18, 13, 10, 9, 12, 15, 15)
my.interaction.plot(score, answer, gender)

13.9 Διάγραμμα αλληλεπίδρασης ενός παράγοντα και επαναλαμβανόμενων μετρήσεων πάνω σε μία συνεχή μεταβλητή (Repeated Measures Interaction Plot)

Παράδειγμα

gender = c(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(89.8, 107.5, 105.7, 81.2, 99.3, 76.7, 100.7, 75.7, 90.3, 105.7, 81.2, 71.7, 71.2, 98, 116.6, 68.5)
weight2 = c(90.8, 108.5, 106.7, 80.2, 100.3, 75.7, 101.7, 74.8, 91.3, 106.7, 80.2, 70.7, 70.2, 99, 117.6, 67.5)
weight3 = c(91.8, 109.5, 107.7, 79.2, 101.3, 74.7, 102.7, 73.7, 92.3, 107.7, 79.2, 69.7, 69.2, 100, 118.6, 66.5)
dataF = data.frame(gender, weight1, weight2, weight3)

repeated.ANOVA = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols = c('weight1', 'weight2', 'weight3'))
data.long = repeated.ANOVA$data.long

# Αλλαγή των labels από weight1, weight2, weight3 σε 1η, 2η, 3η
data.long$time=plyr::revalue(data.long$time, c("weight1"="1η", "weight2"="2η", "weight3"="3η"))

min.all = min(data.long$DependentScore)
lower.y.limit = min.all - 0.05* abs(min.all)
max.all = max(data.long$DependentScore)
upper.y.limit = max.all + 0.05* abs(max.all)
              
par(mar=c(5.1, 14.1, 4.1, 2.1), xpd=TRUE)
cols =  c("brown1", "cadetblue3")
interaction.plot(data.long$time, data.long$gender, 
                 data.long$DependentScore, type="b", col = cols, 
                 legend=F, lty=c(1,2), lwd=2, pch=c(18,24), 
                 xlab="",  ylab="Βάρος",
                 ylim = c(lower.y.limit, upper.y.limit))
legend("left",c("Γυναίκα", "Άνδρας"), bty="n", lty=c(1,2), lwd=2, pch=c(18,24), col=cols, title="Φύλο",inset=c(-0.55, 0.1))

Παράδειγμα 2

gender = c(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0)
gender = factor(gender, levels = c(0, 1), labels = c("Γυναίκα", "Άνδρας"))
weight1 = c(89.8, 107.5, 105.7, 81.2, 99.3, 76.7, 100.7, 75.7, 90.3, 105.7, 81.2, 71.7, 71.2, 98, 116.6, 68.5)
weight2 = c(90.8, 108.5, 106.7, 80.2, 100.3, 75.7, 101.7, 74.8, 91.3, 106.7, 80.2, 70.7, 70.2, 99, 117.6, 67.5)
weight3 = c(91.8, 109.5, 107.7, 79.2, 101.3, 74.7, 102.7, 73.7, 92.3, 107.7, 79.2, 69.7, 69.2, 100, 118.6, 66.5)
dataF = data.frame(gender, weight1, weight2, weight3)

repeated.ANOVA = my_Repeated_ANOVA(dataF, betweencols = c('gender'), withincols = c('weight1', 'weight2', 'weight3'))
data.long = repeated.ANOVA$data.long

# Αλλαγή των labels από weight1, weight2, weight3 σε 1η, 2η, 3η
data.long$time=plyr::revalue(data.long$time, c("weight1"="1η", "weight2"="2η", "weight3"="3η"))

library(dplyr)
library(ggplot2)

data.long %>% 
  group_by(gender, time) %>% 
  summarise(weight = mean(DependentScore)) -> data.long2

#data.long2$gender <- factor(data.long2$gender, levels = rev(levels(data.long2$gender)))
min.all = min(data.long$DependentScore)
lower.y.limit = min.all - 0.05* abs(min.all)
max.all = max(data.long$DependentScore)
upper.y.limit = max.all + 0.05* abs(max.all)

data.long2 %>%
  ggplot() +
  aes(x = time, y = weight, color = gender, label = round(weight, 1)) +
  geom_line(aes(group = gender)) +
  scale_color_brewer(palette = "Set1") +
  geom_point() +  
  geom_text(hjust=0.2, vjust=-1) +
  xlab("Μέτρηση") +
  ylab("Βάρος") + 
  ylim(lower.y.limit, upper.y.limit) +
  labs(color='Φύλο')  +
  guides(color=guide_legend(override.aes=list(fill=NA))) +
  theme(legend.position="left", 
        legend.text=element_text(size=13), 
        legend.title=element_text(size=13), 
        axis.title=element_text(size=13),
        axis.text = element_text(size = 13),
        axis.text.x=element_text(colour="black"),
        axis.text.y=element_text(colour="black"),
        panel.background = element_rect(fill='transparent', color='black'),
        plot.background = element_rect(fill='transparent', color=NA))

13.10 Διάγραμμα διασποράς (Scatterplot) (συνάρτηση my_scatterplot)

Παράδειγμα

valueX = rnorm(1000, 0, 1)
valueY = rbinom(1000, 40, 0.2)
valuecolor = as.factor(c(rep(1, 400), rep(2, 300), rep(3, 300)))
my_scatterplot(x = valueX, y = valueY, color = valuecolor, histograms = TRUE)

Παράδειγμα 2 (χωρίς σημεία)

valueX = rnorm(1000, 0, 1)
valueY = rbinom(1000, 40, 0.2)
valuecolor = as.factor(c(rep(1, 400), rep(2, 300), rep(3, 300)))
data.sample = data.frame(valueX, valueY, valuecolor)

library(ggplot2)
theme_set(theme_bw(16))
ggplot(data.sample, aes(x = valueX, y = valueY, color = valuecolor)) + geom_smooth(method = lm, se=F)+ 
    scale_color_brewer(palette = "Set1") + labs(x = valueX, y = valueY, colour = valuecolor)+ theme(legend.position="left")

14 Logistic Regression (συνάρτηση my_logistic_regression)

Παράδειγμα

income = c(31, 55, 120, 25, 38, 16, 23, 64, 29, 100, 72, 61, 26, 176, 49, 25, 67, 28, 19, 41)
debtinc = c(17, 6, 3, 10, 4, 2, 5, 10, 16, 9, 8, 6, 2, 9, 9, 20, 31, 17, 24, 16)
default = factor(c(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1), labels = c('Κανονική αποπληρωμή', 'Αδυναμία αποπληρωμής'))
lr.data.frame = data.frame(income, debtinc, default)
Hmisc::label(lr.data.frame$income) = "Οικογενειακό εισόδημα"
Hmisc::label(lr.data.frame$debtinc) = "Χρέος ως ποσοστό του εισοδήματος"
Hmisc::label(lr.data.frame$default) = "Αδυναμία αποπληρωμής"

my.model = my_logistic_regression(lr.data.frame, default ~ income + debtinc)
my.model$htmlreport
Coding: 0, 1 = Κανονική αποπληρωμή, Αδυναμία αποπληρωμής
If you need reverse order please run the function by setting reverse.levels = TRUE
Logistic regression results for Αδυναμία αποπληρωμής
Model: Αδυναμία αποπληρωμής ~ Οικογενειακό εισόδημα + Χρέος ως ποσοστό του εισοδήματος
  95% C.I.
B SE z p Exp(B)   Lower Upper
Constant 0.69 1.839 0.375 0.708 1.993   0.054 73.239
Οικογενειακό εισόδημα -0.089 0.053 -1.668 0.095 0.915   0.824 1.016
Χρέος ως ποσοστό του εισοδήματος 0.264 0.124 2.127 0.033 1.302   1.021 1.66
Nagelkerke Coefficient of determination R2 = 0.721
Area Under Curve AUC = 0.939
Prediction quality of Αδυναμία αποπληρωμής at threshold 0.5
Observed  
Κανονική αποπληρωμή Αδυναμία αποπληρωμής   Sum
Predicted
  Κανονική αποπληρωμή 10 2   12
  Αδυναμία αποπληρωμής 1 7   8
Sum 11 9   20
Sensitivity (SNS): 0.778
Specificity (SPC): 0.909
You may also consider other thresholds by setting e.g. threshold = c(0.3, 0.7, 0.9)
my.model$ROCplot

round(my.model$ROCplot$data, 3)
##    threshold specificity sensitivity 1-specificity
## 21       Inf       1.000       0.000         0.000
## 20     0.986       1.000       0.111         0.000
## 19     0.962       1.000       0.222         0.000
## 18     0.942       1.000       0.333         0.000
## 17     0.927       1.000       0.444         0.000
## 16     0.914       1.000       0.556         0.000
## 15     0.845       1.000       0.667         0.000
## 14     0.764       1.000       0.778         0.000
## 13     0.620       0.909       0.778         0.091
## 12     0.469       0.818       0.778         0.182
## 11     0.349       0.818       0.889         0.182
## 10     0.232       0.727       0.889         0.273
## 9      0.188       0.636       0.889         0.364
## 8      0.124       0.636       1.000         0.364
## 7      0.076       0.545       1.000         0.455
## 6      0.054       0.455       1.000         0.545
## 5      0.033       0.364       1.000         0.636
## 4      0.015       0.273       1.000         0.727
## 3      0.001       0.182       1.000         0.818
## 2      0.000       0.091       1.000         0.909
## 1       -Inf       0.000       1.000         1.000

15 Principal Components Analysis (PCA) (συνάρτηση my_PCA_analysis)

Παράδειγμα

item1 = c(1, 5, 4, 3, 4, 5, 2, 1, 3, 5)
item2 = c(3, 3, 4, 2, 5, 3, 2, 3, 2, 2)
item3 = c(2, 4, 3, 3, 3, 2, 4, 3, 4, 3)
PCA.data = data.frame(item1, item2, item3)

############################
# 1η Εφαρμογή PCA.analysis #
############################

PCA.results = my_PCA_analysis(PCA.data)

print(PCA.results$corr_matrix)
##            q1         q2          q3
## q1 1.00000000  0.1639973  0.06726728
## q2 0.16399730  1.0000000 -0.28771371
## q3 0.06726728 -0.2877137  1.00000000
print(PCA.results$corplot)

print(PCA.results$nfactors)
## [1] 2
print(PCA.results$communalities)
## $communality
##        q1        q2        q3 
## 0.8890525 0.7115495 0.7622915
print(PCA.results$eigenvalues)
##   Eigenvalue Percent of Variance Cumulative Percent
## 1      1.306               43.5%              43.5%
## 2      1.057               35.2%              78.7%
## 3      0.637               21.2%              99.9%
print(PCA.results$loadings)
## 
## Loadings:
##    PC1    PC2   
## q1  0.289  0.897
## q2  0.836  0.113
## q3 -0.723  0.489
## 
##                  PC1   PC2
## SS loadings    1.306 1.057
## Proportion Var 0.435 0.352
## Cumulative Var 0.435 0.788
print(PCA.results$predicted.values)
##           PC1        PC2
## 1   0.5648414 -1.9248768
## 2  -0.3707910  1.4958717
## 3   0.8823299  0.4341975
## 4  -0.5468449 -0.3215270
## 5   1.5262000  0.5412219
## 6   1.1305797  0.2418257
## 7  -1.4389649 -0.2361796
## 8  -0.1858440 -1.2978538
## 9  -1.2975303  0.3054960
## 10 -0.2639758  0.7618243
PCA.results$htmlTable
Variables
Original Item
q1 item1
q2 item2
q3 item3
Correlation Matrix
q1 q2 q3
q1 1 0.164 0.067
q2 0.164 1 -0.288
q3 0.067 -0.288 1
Number of factors extracted
2
Total Variance Explained
Eigenvalue Percent of Variance Cumulative Percent
1 1.306 43.5% 43.5%
2 1.057 35.2% 78.7%
3 0.637 21.2% 99.9%
Communalities
communality
q1 0.889
q2 0.712
q3 0.762
Component Matrix
PC1 PC2
q1 0.289 0.897
q2 0.836 0.113
q3 -0.723 0.489
############################
# 2η Εφαρμογή PCA.analysis #
############################

PCA.results = my_PCA_analysis(PCA.data, rotate = 'varimax')

PCA.results$htmlTable
Variables
Original Item
q1 item1
q2 item2
q3 item3
Correlation Matrix
q1 q2 q3
q1 1 0.164 0.067
q2 0.164 1 -0.288
q3 0.067 -0.288 1
Number of factors extracted
2
Total Variance Explained
Eigenvalue Percent of Variance Cumulative Percent
1 1.306 43.5% 43.5%
2 1.057 35.2% 78.7%
3 0.637 21.2% 99.9%
Communalities
communality
q1 0.889
q2 0.712
q3 0.762
Component Matrix
PC1 PC2
q1 0.289 0.897
q2 0.836 0.113
q3 -0.723 0.489
Rotated Component Matrix
RC1 RC2
q1 0.001 0.943
q2 -0.761 0.364
q3 0.839 0.243
Component Transformation Matrix
RC1 RC2
1 0.952 0.308
2 -0.308 0.952
############################
# 3η Εφαρμογή PCA.analysis #
############################

PCA.results = my_PCA_analysis(PCA.data, rotate = 'varimax', sort.loadings = TRUE)
PCA.results$htmlTable
Variables
Original Item
q1 item1
q2 item2
q3 item3
Correlation Matrix
q1 q2 q3
q1 1 0.164 0.067
q2 0.164 1 -0.288
q3 0.067 -0.288 1
Number of factors extracted
2
Total Variance Explained
Eigenvalue Percent of Variance Cumulative Percent
1 1.306 43.5% 43.5%
2 1.057 35.2% 78.7%
3 0.637 21.2% 99.9%
Communalities
communality
q1 0.889
q2 0.712
q3 0.762
Component Matrix
PC1 PC2
q1 0.289 0.897
q2 0.836 0.113
q3 -0.723 0.489
Rotated Component Matrix
RC1 RC2
q1 0.001 0.943
q2 -0.761 0.364
q3 0.839 0.243
Component Transformation Matrix
RC1 RC2
1 0.952 0.308
2 -0.308 0.952
############################
# 4η Εφαρμογή PCA.analysis #
############################

# Αν eigeinvalues.limit < 0, τότε βρίσκει το βέλτιστο πλήθος παραγόντων εφαρμόζοντας παράλληλη ανάλυση...
PCA.results = my_PCA_analysis(PCA.data, eigeinvalues.limit = -1)

## Parallel analysis suggests that the number of factors =  0  and the number of components =  0

PCA.results$htmlTable
Variables
Original Item
q1 item1
q2 item2
q3 item3
Correlation Matrix
q1 q2 q3
q1 1 0.164 0.067
q2 0.164 1 -0.288
q3 0.067 -0.288 1
Number of factors extracted
0
Total Variance Explained
Eigenvalue Percent of Variance Cumulative Percent
1 1.306 43.5% 43.5%
2 1.057 35.2% 78.7%
3 0.637 21.2% 99.9%
Communalities
communality
q1 1
q2 1
q3 1
Component Matrix
PC1 PC2 PC3
q1 0.289 0.897 -0.333
q2 0.836 0.113 0.537
q3 -0.723 0.489 0.488

16 Ανάλυση κλίμακας Likert

16.1 Ομαδοποίηση αποκρίσεων με δενδρόγραμμα (συνάρτηση my_dendrogram)

Η απόσταση δύο μεταβλητών μπορεί να υπολογιστεί με πολλούς τρόπους. Η παρακάτω συνάρτηση υλοποιεί τον υπολογισμό των αποστάσεων με την ευκλείδεια απόσταση και με την απόσταση Hamming η οποία είναι κατάλληλη για μεταβλητές με τιμές 0 και 1 και ορίζεται ως το πλήθος των τιμών που δεν είναι ίσες μεταξύ των δύο διανυσμάτων. Η παρακάτω συνάρτηση υπολογίζει τον πίνακα των αποστάσεων για ένα dataframe και δημιουργεί το δενδρόγραμμα των μεταβλητών, επιστρέφοντας και τον αντίστοιχο πίνακα αποστάσεων.

Παράδειγμα 1

Item1 = c(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1)
Item2 = c(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1)
Item3 = c(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1)
my.data.frame = data.frame(Item1, Item2, Item3)
my.d = my_dendrogram(my.data.frame, plot.labels = c("Item 1", "Item 2", "Item 3"), type = 'hamming')

my.d$distMat
Variables
Original Item
q1 Item1
q2 Item2
q3 Item3
Hamming distance
Cols
Item 1 Item 2 Item 3
Rows
  Item 1 0 4 6
  Item 2 4 0 6
  Item 3 6 6 0

Παράδειγμα 2

Item1 = c(0, 5, 4, 3, 1, 2, 2, 1, 1, 0, 0, 1)
Item2 = c(5, 4, 4, 1, 1, 1, 1, 5, 5, 4, 3, 3)
Item3 = c(1, 2, 3, 3, 2, 1, 1, 0, 0, 0, 0, 1)
Item4 = c(1, 2, 3, 3, 2, 1, 1, 0, 0, 0, 0, 1)
my.data.frame = data.frame(Item1, Item2, Item3, Item4)
my.d = my_dendrogram(my.data.frame, plot.labels = c("Item 1", "Item 2", "Item 3", "Item 4"), type = 'euclidean', groups = 2)

my.d$distMat
Variables
Original Item
q1 Item1
q2 Item2
q3 Item3
q4 Item4
Euclidean distance
Cols
Item 1 Item 2 Item 3 Item 4
Rows
  Item 1 0 9.6 4 4
  Item 2 9.6 0 10.2 10.2
  Item 3 4 10.2 0 0
  Item 4 4 10.2 0 0

16.2 Πίνακας συχνοτήτων ή σχετικών συχνοτήτων των αποκρίσεων (συνάρτηση my_likert_scale_description)

Παράδειγμα

Item1 = c(0, 5, 3, 2, 1, 5, 2, 1, 3, 3, 4, 4)
Item2 = c(0, 1, 0, 2, 0, 1, 1, 1, 2, 2, 2, 2)
Item3 = c(3, 5, 4, 4, 2, 2, 4, 5, 3, 3, 4, 3)
my.data.frame = data.frame(Item1, Item2, Item3)

my_likert_scale_description(data = my.data.frame, type = 'freq')
Item Responses 0 1 2 3 4 5 M (SD)
1 Item1 12 1 2 2 3 2 2 2.8 (1.6)
2 Item2 12 3 4 5 0 0 0 1.2 (0.83)
3 Item3 12 0 0 2 4 4 2 3.5 (1.0)
my_likert_scale_description(data = my.data.frame, type = 'percent')
Item Responses 0 1 2 3 4 5 M (SD)
1 Item1 12 8.3% 16.7% 16.7% 25% 16.7% 16.7% 2.8 (1.6)
2 Item2 12 25% 33.3% 41.7% 0% 0% 0% 1.2 (0.83)
3 Item3 12 0% 0% 16.7% 33.3% 33.3% 16.7% 3.5 (1.0)
my_likert_scale_description(data = my.data.frame, type = 'both')
Item Responses 0 1 2 3 4 5 M (SD)
1 Item1 12 1 (8.3%) 2 (16.7%) 2 (16.7%) 3 (25%) 2 (16.7%) 2 (16.7%) 2.8 (1.6)
2 Item2 12 3 (25%) 4 (33.3%) 5 (41.7%) 0 (0%) 0 (0%) 0 (0%) 1.2 (0.83)
3 Item3 12 0 (0%) 0 (0%) 2 (16.7%) 4 (33.3%) 4 (33.3%) 2 (16.7%) 3.5 (1.0)

16.3 Υπολογισμός ομοιότητας αποκρίσεων μεταξύ των items (συνάρτηση response_similarity_among_items)

Παράδειγμα

Item1 = c(0, 5, 3, 2, 1, 5, 2, 1, 3, 3, 4, 4)
Item2 = c(0, 1, 0, 2, 0, 1, 1, 1, 2, 2, 2, 2)
Item3 = c(3, 5, 4, 4, 2, 2, 4, 5, 3, 3, 4, 3)
Item4 = c(1, 1, 2, 3, 3, 2, 1, 1, 2, 2, 2, 2)
my.data.frame = data.frame(Item1, Item2, Item3, Item4)

similarity.report = response_similarity_among_items(c("Item1", "Item2", "Item3", "Item4"), my.data.frame, acceptable.difference = 0)

htmlTable::htmlTable(similarity.report$items.mean)
Item Mean
1 Item1 2.75
2 Item2 1.17
3 Item3 3.5
4 Item4 1.83
htmlTable::htmlTable(similarity.report$agreement.df)
Item1 Item2 Item3 Item4
Item1 100 25 33.3 8.3
Item2 25 100 0 58.3
Item3 33.3 0 100 8.3
Item4 8.3 58.3 8.3 100
htmlTable::htmlTable(similarity.report$agreement.descending)
Item1 Item2 Agreement
8 Item4 Item2 58.3
3 Item3 Item1 33.3
2 Item2 Item1 25
4 Item4 Item1 8.3
12 Item4 Item3 8.3
htmlTable::htmlTable(similarity.report$difference.df)
Item1 Item2 Item3 Item4
Item1 0 1.58 -0.75 0.92
Item2 -1.58 0 -2.33 -0.67
Item3 0.75 2.33 0 1.67
Item4 -0.92 0.67 -1.67 0
htmlTable::htmlTable(similarity.report$difference.descending)
Item1 Item2 Difference
7 Item3 Item2 2.33
3 Item3 Item1 0.75
8 Item4 Item2 0.67
1 Item1 Item1 0
6 Item2 Item2 0
11 Item3 Item3 0
16 Item4 Item4 0
4 Item4 Item1 -0.92
2 Item2 Item1 -1.58
12 Item4 Item3 -1.67
htmlTable::htmlTable(similarity.report$difference.significances)
Item1 Item2 Item3 Item4
Item1 0.003 0.191 0.094
Item2 0.003 0 0.039
Item3 0.191 0 0.003
Item4 0.094 0.039 0.003
htmlTable::htmlTable(similarity.report$spearman.corr.r)
Item1 Item2 Item3 Item4
Item1 1 0.408 0.015 0.066
Item2 0.408 1 0.066 0.272
Item3 0.015 0.066 1 -0.493
Item4 0.066 0.272 -0.493 1
htmlTable::htmlTable(similarity.report$spearman.corr.n)
Item1 Item2 Item3 Item4
Item1 12 12 12 12
Item2 12 12 12 12
Item3 12 12 12 12
Item4 12 12 12 12
htmlTable::htmlTable(similarity.report$spearman.corr.sig)
Item1 Item2 Item3 Item4
Item1 0.188 0.964 0.839
Item2 0.188 0.839 0.392
Item3 0.964 0.839 0.104
Item4 0.839 0.392 0.104
htmlTable::htmlTable(similarity.report$spearman.corr.descenting)
Item1 Item2 Spearman
1 Item1 Item1 1
6 Item2 Item2 1
11 Item3 Item3 1
16 Item4 Item4 1
2 Item2 Item1 0.408
8 Item4 Item2 0.272
7 Item3 Item2 0.066
4 Item4 Item1 0.066
3 Item3 Item1 0.015
12 Item4 Item3 -0.493
htmlTable::htmlTable(similarity.report$pearson.corr.r)
Item1 Item2 Item3 Item4
Item1 1 0.442 0.028 0.04
Item2 0.442 1 0.109 0.202
Item3 0.028 0.109 1 -0.507
Item4 0.04 0.202 -0.507 1
htmlTable::htmlTable(similarity.report$pearson.corr.n)
Item1 Item2 Item3 Item4
Item1 12 12 12 12
Item2 12 12 12 12
Item3 12 12 12 12
Item4 12 12 12 12
htmlTable::htmlTable(similarity.report$pearson.corr.sig)
Item1 Item2 Item3 Item4
Item1 0.151 0.93 0.903
Item2 0.151 0.736 0.528
Item3 0.93 0.736 0.093
Item4 0.903 0.528 0.093
htmlTable::htmlTable(similarity.report$pearson.corr.descenting)
Item1 Item2 Pearson
1 Item1 Item1 1
6 Item2 Item2 1
11 Item3 Item3 1
16 Item4 Item4 1
2 Item2 Item1 0.442
8 Item4 Item2 0.202
7 Item3 Item2 0.109
4 Item4 Item1 0.04
3 Item3 Item1 0.028
12 Item4 Item3 -0.507

16.4 Γλωσσική ομοιότητα μεταξύ των ερωτήσεων (cosine και όλες οι μέθοδοι που υποστηρίζει η βιβλιοθήκη stringdist) (συναρτήσεις word_similarity_among_items_stringdist και word_similarit_among_items)

# Υπολογίζει αποκλειστικά και μόνο το cos μεταξύ των προτάσεων...
word_similarity_among_items=function(sentences){
  # https://stackoverflow.com/questions/57092479/finding-the-cosine-similarity-of-a-sentence-with-many-others-in-r
  
  df.to.return.cos.between.sentences.manual <- data.frame(matrix(ncol = length(sentences), nrow = 0))
  colnames(df.to.return.cos.between.sentences.manual) = paste("Item", 1:length(sentences), sep = "")

  names(sentences) = paste("Item", 1:length(sentences), sep = "")
  
  for(asentence in sentences){
    sv = c(sentences, Check = asentence)
    svs <- strsplit(tolower(sv), "\\s+")
    termf <- table(stack(svs))
    idf <- log(1/rowMeans(termf != 0))
    tfidf <- termf*idf
    dp <- t(tfidf[, length(sv)]) %*% tfidf[,-length(sv)]
    cosim <- dp/(sqrt(colSums(tfidf[,-length(sv)]^2))*sqrt(sum(tfidf[,length(sv)]^2)))
    df.to.return.cos.between.sentences.manual[nrow(df.to.return.cos.between.sentences.manual) + 1,] = round(cosim, 3)
  }
  rownames(df.to.return.cos.between.sentences.manual) = paste("Item", 1:length(sentences), sep = "")
  
  return(df.to.return.cos.between.sentences.manual)
}

Παράδειγμα

GOHAI.items.en = c("How often did you limit the kinds or amounts of food you eat because of problems with your teeth or dentures?", 
             "How often do you have trouble biting or chewing any kinds of food, such as tough meat or apples?", 
             "How often were you able to swallow comfortably?", 
             "How often have your teeth or dentures prevented you from speaking the way you wanted?", 
             "How often were you able to eat anything without feeling discomfort?", 
             "How often did you limit contacts with people because of the condition of your teeth or dentures?", 
             "How often were you pleased or happy with the looks of your teeth and gums, or dentures?", 
             "How often did you use medication to relieve pain or discomfort from around your mouth?", 
             "How often were you worried or concerned about the problems of your teeth, gums or dentures?", 
             "How often did you feel nervous or self-conscious because of problems with your teeth, gums, or dentures?", 
             "How often did you feel uncomfortable eating in front of people because of problems with your teeth or dentures?", 
             "How often were your teeth or gums sensitive to hot, cold, or sweets?")

word_similarity_among_items_stringdist(GOHAI.items.en)
##        Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11
## Item1  0.000 0.033 0.127 0.061 0.051 0.024 0.031 0.033 0.048  0.040  0.029
## Item2  0.033 0.000 0.132 0.088 0.064 0.054 0.047 0.060 0.090  0.070  0.058
## Item3  0.127 0.132 0.000 0.166 0.108 0.149 0.127 0.161 0.134  0.146  0.111
## Item4  0.061 0.088 0.166 0.000 0.065 0.069 0.028 0.110 0.048  0.080  0.048
## Item5  0.051 0.064 0.108 0.065 0.000 0.033 0.071 0.089 0.088  0.097  0.041
## Item6  0.024 0.054 0.149 0.069 0.033 0.000 0.055 0.055 0.068  0.063  0.032
## Item7  0.031 0.047 0.127 0.028 0.071 0.055 0.000 0.085 0.037  0.047  0.037
## Item8  0.033 0.060 0.161 0.110 0.089 0.055 0.085 0.000 0.073  0.077  0.064
## Item9  0.048 0.090 0.134 0.048 0.088 0.068 0.037 0.073 0.000  0.036  0.034
## Item10 0.040 0.070 0.146 0.080 0.097 0.063 0.047 0.077 0.036  0.000  0.037
## Item11 0.029 0.058 0.111 0.048 0.041 0.032 0.037 0.064 0.034  0.037  0.000
## Item12 0.052 0.092 0.149 0.068 0.073 0.056 0.043 0.111 0.045  0.045  0.061
##        Item12
## Item1   0.052
## Item2   0.092
## Item3   0.149
## Item4   0.068
## Item5   0.073
## Item6   0.056
## Item7   0.043
## Item8   0.111
## Item9   0.045
## Item10  0.045
## Item11  0.061
## Item12  0.000

16.5 Γλωσσική ομοιότητα μεταξύ των ερωτήσεων (πλήθος κοινών λέξεων + δείκτης ομοιότητας) (συνάρτηση common_words_similarity_among_items)

Παράδειγμα

GOHAI.items.en = c("How often did you limit the kinds or amounts of food you eat because of problems with your teeth or dentures?", 
             "How often do you have trouble biting or chewing any kinds of food, such as tough meat or apples?", 
             "How often were you able to swallow comfortably?", 
             "How often have your teeth or dentures prevented you from speaking the way you wanted?", 
             "How often were you able to eat anything without feeling discomfort?", 
             "How often did you limit contacts with people because of the condition of your teeth or dentures?", 
             "How often were you pleased or happy with the looks of your teeth and gums, or dentures?", 
             "How often did you use medication to relieve pain or discomfort from around your mouth?", 
             "How often were you worried or concerned about the problems of your teeth, gums or dentures?", 
             "How often did you feel nervous or self-conscious because of problems with your teeth, gums, or dentures?", 
             "How often did you feel uncomfortable eating in front of people because of problems with your teeth or dentures?", 
             "How often were your teeth or gums sensitive to hot, cold, or sweets?")

common_words_similarity_among_items(GOHAI.items.en)
## $df.to.return.common.words.count
##    Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1     21     8     3     9     4    14    11     6    11     13     13      6
## 2      8    19     3     5     3     5     6     4     6      6      5      4
## 3      3     3     8     3     6     3     4     4     4      3      3      4
## 4      9     5     3    15     3     8     8     6     8      7      7      5
## 5      4     3     6     3    11     3     4     5     4      3      3      4
## 6     14     5     3     8     3    17    10     6     9     11     13      5
## 7     11     6     4     8     4    10    17     5    12     11      9      8
## 8      6     4     4     6     5     6     5    15     5      6      6      5
## 9     11     6     4     8     4     9    12     5    16     11      9      8
## 10    13     6     3     7     3    11    11     6    11     17     13      7
## 11    13     5     3     7     3    13     9     6     9     13     19      5
## 12     6     4     4     5     4     5     8     5     8      7      5     13
## 
## $common.words.count.long
##       Var1   Var2 Freq
## 2    Item2  Item1    8
## 3    Item3  Item1    3
## 4    Item4  Item1    9
## 5    Item5  Item1    4
## 6    Item6  Item1   14
## 7    Item7  Item1   11
## 8    Item8  Item1    6
## 9    Item9  Item1   11
## 10  Item10  Item1   13
## 11  Item11  Item1   13
## 12  Item12  Item1    6
## 15   Item3  Item2    3
## 16   Item4  Item2    5
## 17   Item5  Item2    3
## 18   Item6  Item2    5
## 19   Item7  Item2    6
## 20   Item8  Item2    4
## 21   Item9  Item2    6
## 22  Item10  Item2    6
## 23  Item11  Item2    5
## 24  Item12  Item2    4
## 28   Item4  Item3    3
## 29   Item5  Item3    6
## 30   Item6  Item3    3
## 31   Item7  Item3    4
## 32   Item8  Item3    4
## 33   Item9  Item3    4
## 34  Item10  Item3    3
## 35  Item11  Item3    3
## 36  Item12  Item3    4
## 41   Item5  Item4    3
## 42   Item6  Item4    8
## 43   Item7  Item4    8
## 44   Item8  Item4    6
## 45   Item9  Item4    8
## 46  Item10  Item4    7
## 47  Item11  Item4    7
## 48  Item12  Item4    5
## 54   Item6  Item5    3
## 55   Item7  Item5    4
## 56   Item8  Item5    5
## 57   Item9  Item5    4
## 58  Item10  Item5    3
## 59  Item11  Item5    3
## 60  Item12  Item5    4
## 67   Item7  Item6   10
## 68   Item8  Item6    6
## 69   Item9  Item6    9
## 70  Item10  Item6   11
## 71  Item11  Item6   13
## 72  Item12  Item6    5
## 80   Item8  Item7    5
## 81   Item9  Item7   12
## 82  Item10  Item7   11
## 83  Item11  Item7    9
## 84  Item12  Item7    8
## 93   Item9  Item8    5
## 94  Item10  Item8    6
## 95  Item11  Item8    6
## 96  Item12  Item8    5
## 106 Item10  Item9   11
## 107 Item11  Item9    9
## 108 Item12  Item9    8
## 119 Item11 Item10   13
## 120 Item12 Item10    7
## 132 Item12 Item11    5
## 
## $df.to.return.common.words.string
##                                                                                                                                                      Item1
## 1     amounts   because   dentures   did   eat   food   How   kinds   limit   of   of   often   or   or   problems   teeth   the   with   you   you   your
## 2                                                                                                          food   How   kinds   of   often   or   or   you
## 3                                                                                                                                        How   often   you
## 4                                                                                             dentures   How   often   or   teeth   the   you   you   your
## 5                                                                                                                                  eat   How   often   you
## 6                                                          because   dentures   did   How   limit   of   of   often   or   teeth   the   with   you   your
## 7                                                                                  dentures   How   of   often   or   or   teeth   the   with   you   your
## 8                                                                                                                      did   How   often   or   you   your
## 9                                                                              dentures   How   of   often   or   or   problems   teeth   the   you   your
## 10                                                            because   dentures   did   How   of   often   or   or   problems   teeth   with   you   your
## 11                                                            because   dentures   did   How   of   of   often   or   problems   teeth   with   you   your
## 12                                                                                                                    How   often   or   or   teeth   your
##                                                                                                                                    Item2
## 1                                                                                        food   How   kinds   of   often   or   or   you
## 2     any   apples   as   biting   chewing   do   food   have   How   kinds   meat   of   often   or   or   such   tough   trouble   you
## 3                                                                                                                      How   often   you
## 4                                                                                                          have   How   often   or   you
## 5                                                                                                                      How   often   you
## 6                                                                                                            How   of   often   or   you
## 7                                                                                                       How   of   often   or   or   you
## 8                                                                                                                 How   often   or   you
## 9                                                                                                       How   of   often   or   or   you
## 10                                                                                                      How   of   often   or   or   you
## 11                                                                                                           How   of   often   or   you
## 12                                                                                                                 How   often   or   or
##                                                              Item3
## 1                                                How   often   you
## 2                                                How   often   you
## 3     able   comfortably   How   often   swallow   to   were   you
## 4                                                How   often   you
## 5                             able   How   often   to   were   you
## 6                                                How   often   you
## 7                                         How   often   were   you
## 8                                           How   often   to   you
## 9                                         How   often   were   you
## 10                                               How   often   you
## 11                                               How   often   you
## 12                                         How   often   to   were
##                                                                                                                  Item4
## 1                                                         dentures   How   often   or   teeth   the   you   you   your
## 2                                                                                        have   How   often   or   you
## 3                                                                                                    How   often   you
## 4     dentures   from   have   How   often   or   prevented   speaking   teeth   the   wanted   way   you   you   your
## 5                                                                                                    How   often   you
## 6                                                               dentures   How   often   or   teeth   the   you   your
## 7                                                               dentures   How   often   or   teeth   the   you   your
## 8                                                                                 from   How   often   or   you   your
## 9                                                               dentures   How   often   or   teeth   the   you   your
## 10                                                                    dentures   How   often   or   teeth   you   your
## 11                                                                    dentures   How   often   or   teeth   you   your
## 12                                                                                     How   often   or   teeth   your
##                                                                                        Item5
## 1                                                                    eat   How   often   you
## 2                                                                          How   often   you
## 3                                                       able   How   often   to   were   you
## 4                                                                          How   often   you
## 5     able   anything   discomfort   eat   feeling   How   often   to   were   without   you
## 6                                                                          How   often   you
## 7                                                                   How   often   were   you
## 8                                                        discomfort   How   often   to   you
## 9                                                                   How   often   were   you
## 10                                                                         How   often   you
## 11                                                                         How   often   you
## 12                                                                   How   often   to   were
##                                                                                                                                 Item6
## 1                                     because   dentures   did   How   limit   of   of   often   or   teeth   the   with   you   your
## 2                                                                                                         How   of   often   or   you
## 3                                                                                                                   How   often   you
## 4                                                                              dentures   How   often   or   teeth   the   you   your
## 5                                                                                                                   How   often   you
## 6     because   condition   contacts   dentures   did   How   limit   of   of   often   or   people   teeth   the   with   you   your
## 7                                                                  dentures   How   of   often   or   teeth   the   with   you   your
## 8                                                                                                 did   How   often   or   you   your
## 9                                                                         dentures   How   of   often   or   teeth   the   you   your
## 10                                                       because   dentures   did   How   of   often   or   teeth   with   you   your
## 11                                         because   dentures   did   How   of   of   often   or   people   teeth   with   you   your
## 12                                                                                                    How   often   or   teeth   your
##                                                                                                                       Item7
## 1                                                   dentures   How   of   often   or   or   teeth   the   with   you   your
## 2                                                                                          How   of   often   or   or   you
## 3                                                                                                  How   often   were   you
## 4                                                                    dentures   How   often   or   teeth   the   you   your
## 5                                                                                                  How   often   were   you
## 6                                                        dentures   How   of   often   or   teeth   the   with   you   your
## 7     and   dentures   gums   happy   How   looks   of   often   or   or   pleased   teeth   the   were   with   you   your
## 8                                                                                             How   often   or   you   your
## 9                                            dentures   gums   How   of   often   or   or   teeth   the   were   you   your
## 10                                                 dentures   gums   How   of   often   or   or   teeth   with   you   your
## 11                                                             dentures   How   of   often   or   teeth   with   you   your
## 12                                                                       gums   How   often   or   or   teeth   were   your
##                                                                                                                   Item8
## 1                                                                                   did   How   often   or   you   your
## 2                                                                                                How   often   or   you
## 3                                                                                                How   often   to   you
## 4                                                                                  from   How   often   or   you   your
## 5                                                                                   discomfort   How   often   to   you
## 6                                                                                   did   How   often   or   you   your
## 7                                                                                         How   often   or   you   your
## 8     around   did   discomfort   from   How   medication   mouth   often   or   pain   relieve   to   use   you   your
## 9                                                                                         How   often   or   you   your
## 10                                                                                  did   How   often   or   you   your
## 11                                                                                  did   How   often   or   you   your
## 12                                                                                         How   often   or   to   your
##                                                                                                                         Item9
## 1                                                 dentures   How   of   often   or   or   problems   teeth   the   you   your
## 2                                                                                            How   of   often   or   or   you
## 3                                                                                                    How   often   were   you
## 4                                                                      dentures   How   often   or   teeth   the   you   your
## 5                                                                                                    How   often   were   you
## 6                                                                 dentures   How   of   often   or   teeth   the   you   your
## 7                                              dentures   gums   How   of   often   or   or   teeth   the   were   you   your
## 8                                                                                               How   often   or   you   your
## 9     about   concerned   dentures   gums   How   of   often   or   or   problems   teeth   the   were   worried   you   your
## 10                                               dentures   gums   How   of   often   or   or   problems   teeth   you   your
## 11                                                           dentures   How   of   often   or   problems   teeth   you   your
## 12                                                                         gums   How   often   or   or   teeth   were   your
##                                                                                                                                      Item10
## 1                                              because   dentures   did   How   of   often   or   or   problems   teeth   with   you   your
## 2                                                                                                          How   of   often   or   or   you
## 3                                                                                                                         How   often   you
## 4                                                                                          dentures   How   often   or   teeth   you   your
## 5                                                                                                                         How   often   you
## 6                                                              because   dentures   did   How   of   often   or   teeth   with   you   your
## 7                                                                  dentures   gums   How   of   often   or   or   teeth   with   you   your
## 8                                                                                                       did   How   often   or   you   your
## 9                                                              dentures   gums   How   of   often   or   or   problems   teeth   you   your
## 10    because   dentures   did   feel   gums   How   nervous   of   often   or   or   problems   self-conscious   teeth   with   you   your
## 11                                           because   dentures   did   feel   How   of   often   or   problems   teeth   with   you   your
## 12                                                                                              gums   How   often   or   or   teeth   your
##                                                                                                                                                   Item11
## 1                                                           because   dentures   did   How   of   of   often   or   problems   teeth   with   you   your
## 2                                                                                                                            How   of   often   or   you
## 3                                                                                                                                      How   often   you
## 4                                                                                                       dentures   How   often   or   teeth   you   your
## 5                                                                                                                                      How   often   you
## 6                                                             because   dentures   did   How   of   of   often   or   people   teeth   with   you   your
## 7                                                                                           dentures   How   of   often   or   teeth   with   you   your
## 8                                                                                                                    did   How   often   or   you   your
## 9                                                                                       dentures   How   of   often   or   problems   teeth   you   your
## 10                                                        because   dentures   did   feel   How   of   often   or   problems   teeth   with   you   your
## 11    because   dentures   did   eating   feel   front   How   in   of   of   often   or   people   problems   teeth   uncomfortable   with   you   your
## 12                                                                                                                       How   often   or   teeth   your
##                                                                                          Item12
## 1                                                          How   often   or   or   teeth   your
## 2                                                                         How   often   or   or
## 3                                                                       How   often   to   were
## 4                                                               How   often   or   teeth   your
## 5                                                                       How   often   to   were
## 6                                                               How   often   or   teeth   your
## 7                                            gums   How   often   or   or   teeth   were   your
## 8                                                                  How   often   or   to   your
## 9                                            gums   How   often   or   or   teeth   were   your
## 10                                                  gums   How   often   or   or   teeth   your
## 11                                                              How   often   or   teeth   your
## 12    cold   gums   hot   How   often   or   or   sensitive   sweets   teeth   to   were   your
## 
## $common.words.string.long
##       Var1   Var2
## 2    Item2  Item1
## 3    Item3  Item1
## 4    Item4  Item1
## 5    Item5  Item1
## 6    Item6  Item1
## 7    Item7  Item1
## 8    Item8  Item1
## 9    Item9  Item1
## 10  Item10  Item1
## 11  Item11  Item1
## 12  Item12  Item1
## 15   Item3  Item2
## 16   Item4  Item2
## 17   Item5  Item2
## 18   Item6  Item2
## 19   Item7  Item2
## 20   Item8  Item2
## 21   Item9  Item2
## 22  Item10  Item2
## 23  Item11  Item2
## 24  Item12  Item2
## 28   Item4  Item3
## 29   Item5  Item3
## 30   Item6  Item3
## 31   Item7  Item3
## 32   Item8  Item3
## 33   Item9  Item3
## 34  Item10  Item3
## 35  Item11  Item3
## 36  Item12  Item3
## 41   Item5  Item4
## 42   Item6  Item4
## 43   Item7  Item4
## 44   Item8  Item4
## 45   Item9  Item4
## 46  Item10  Item4
## 47  Item11  Item4
## 48  Item12  Item4
## 54   Item6  Item5
## 55   Item7  Item5
## 56   Item8  Item5
## 57   Item9  Item5
## 58  Item10  Item5
## 59  Item11  Item5
## 60  Item12  Item5
## 67   Item7  Item6
## 68   Item8  Item6
## 69   Item9  Item6
## 70  Item10  Item6
## 71  Item11  Item6
## 72  Item12  Item6
## 80   Item8  Item7
## 81   Item9  Item7
## 82  Item10  Item7
## 83  Item11  Item7
## 84  Item12  Item7
## 93   Item9  Item8
## 94  Item10  Item8
## 95  Item11  Item8
## 96  Item12  Item8
## 106 Item10  Item9
## 107 Item11  Item9
## 108 Item12  Item9
## 119 Item11 Item10
## 120 Item12 Item10
## 132 Item12 Item11
##                                                                                                   Freq
## 2                                                      food   How   kinds   of   often   or   or   you
## 3                                                                                    How   often   you
## 4                                         dentures   How   often   or   teeth   the   you   you   your
## 5                                                                              eat   How   often   you
## 6      because   dentures   did   How   limit   of   of   often   or   teeth   the   with   you   your
## 7                              dentures   How   of   often   or   or   teeth   the   with   you   your
## 8                                                                  did   How   often   or   you   your
## 9                          dentures   How   of   often   or   or   problems   teeth   the   you   your
## 10        because   dentures   did   How   of   often   or   or   problems   teeth   with   you   your
## 11        because   dentures   did   How   of   of   often   or   problems   teeth   with   you   your
## 12                                                                How   often   or   or   teeth   your
## 15                                                                                   How   often   you
## 16                                                                       have   How   often   or   you
## 17                                                                                   How   often   you
## 18                                                                         How   of   often   or   you
## 19                                                                    How   of   often   or   or   you
## 20                                                                              How   often   or   you
## 21                                                                    How   of   often   or   or   you
## 22                                                                    How   of   often   or   or   you
## 23                                                                         How   of   often   or   you
## 24                                                                               How   often   or   or
## 28                                                                                   How   often   you
## 29                                                                able   How   often   to   were   you
## 30                                                                                   How   often   you
## 31                                                                            How   often   were   you
## 32                                                                              How   often   to   you
## 33                                                                            How   often   were   you
## 34                                                                                   How   often   you
## 35                                                                                   How   often   you
## 36                                                                             How   often   to   were
## 41                                                                                   How   often   you
## 42                                              dentures   How   often   or   teeth   the   you   your
## 43                                              dentures   How   often   or   teeth   the   you   your
## 44                                                                from   How   often   or   you   your
## 45                                              dentures   How   often   or   teeth   the   you   your
## 46                                                    dentures   How   often   or   teeth   you   your
## 47                                                    dentures   How   often   or   teeth   you   your
## 48                                                                     How   often   or   teeth   your
## 54                                                                                   How   often   you
## 55                                                                            How   often   were   you
## 56                                                                 discomfort   How   often   to   you
## 57                                                                            How   often   were   you
## 58                                                                                   How   often   you
## 59                                                                                   How   often   you
## 60                                                                             How   often   to   were
## 67                                  dentures   How   of   often   or   teeth   the   with   you   your
## 68                                                                 did   How   often   or   you   your
## 69                                         dentures   How   of   often   or   teeth   the   you   your
## 70                        because   dentures   did   How   of   often   or   teeth   with   you   your
## 71          because   dentures   did   How   of   of   often   or   people   teeth   with   you   your
## 72                                                                     How   often   or   teeth   your
## 80                                                                       How   often   or   you   your
## 81                      dentures   gums   How   of   often   or   or   teeth   the   were   you   your
## 82                            dentures   gums   How   of   often   or   or   teeth   with   you   your
## 83                                        dentures   How   of   often   or   teeth   with   you   your
## 84                                                  gums   How   often   or   or   teeth   were   your
## 93                                                                       How   often   or   you   your
## 94                                                                 did   How   often   or   you   your
## 95                                                                 did   How   often   or   you   your
## 96                                                                        How   often   or   to   your
## 106                       dentures   gums   How   of   often   or   or   problems   teeth   you   your
## 107                                   dentures   How   of   often   or   problems   teeth   you   your
## 108                                                 gums   How   often   or   or   teeth   were   your
## 119     because   dentures   did   feel   How   of   often   or   problems   teeth   with   you   your
## 120                                                        gums   How   often   or   or   teeth   your
## 132                                                                    How   often   or   teeth   your
## 
## $df.to.return.common.words.index
##    Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1  1.000 0.400 0.207 0.500 0.250 0.737 0.579 0.333 0.595  0.684  0.650  0.353
## 2  0.400 1.000 0.222 0.294 0.200 0.278 0.333 0.235 0.343  0.333  0.263  0.250
## 3  0.207 0.222 1.000 0.261 0.632 0.240 0.320 0.348 0.333  0.240  0.222  0.381
## 4  0.500 0.294 0.261 1.000 0.231 0.500 0.500 0.400 0.516  0.438  0.412  0.357
## 5  0.250 0.200 0.632 0.231 1.000 0.214 0.286 0.385 0.296  0.214  0.200  0.333
## 6  0.737 0.278 0.240 0.500 0.214 1.000 0.588 0.375 0.545  0.647  0.722  0.333
## 7  0.579 0.333 0.320 0.500 0.286 0.588 1.000 0.312 0.727  0.647  0.500  0.533
## 8  0.333 0.235 0.348 0.400 0.385 0.375 0.312 1.000 0.323  0.375  0.353  0.357
## 9  0.595 0.343 0.333 0.516 0.296 0.545 0.727 0.323 1.000  0.667  0.514  0.552
## 10 0.684 0.333 0.240 0.438 0.214 0.647 0.647 0.375 0.667  1.000  0.722  0.467
## 11 0.650 0.263 0.222 0.412 0.200 0.722 0.500 0.353 0.514  0.722  1.000  0.312
## 12 0.353 0.250 0.381 0.357 0.333 0.333 0.533 0.357 0.552  0.467  0.312  1.000
## 
## $common.words.index.long
##       Var1   Var2  Freq
## 2    Item2  Item1 0.400
## 3    Item3  Item1 0.207
## 4    Item4  Item1 0.500
## 5    Item5  Item1 0.250
## 6    Item6  Item1 0.737
## 7    Item7  Item1 0.579
## 8    Item8  Item1 0.333
## 9    Item9  Item1 0.595
## 10  Item10  Item1 0.684
## 11  Item11  Item1 0.650
## 12  Item12  Item1 0.353
## 15   Item3  Item2 0.222
## 16   Item4  Item2 0.294
## 17   Item5  Item2 0.200
## 18   Item6  Item2 0.278
## 19   Item7  Item2 0.333
## 20   Item8  Item2 0.235
## 21   Item9  Item2 0.343
## 22  Item10  Item2 0.333
## 23  Item11  Item2 0.263
## 24  Item12  Item2 0.250
## 28   Item4  Item3 0.261
## 29   Item5  Item3 0.632
## 30   Item6  Item3 0.240
## 31   Item7  Item3 0.320
## 32   Item8  Item3 0.348
## 33   Item9  Item3 0.333
## 34  Item10  Item3 0.240
## 35  Item11  Item3 0.222
## 36  Item12  Item3 0.381
## 41   Item5  Item4 0.231
## 42   Item6  Item4 0.500
## 43   Item7  Item4 0.500
## 44   Item8  Item4 0.400
## 45   Item9  Item4 0.516
## 46  Item10  Item4 0.438
## 47  Item11  Item4 0.412
## 48  Item12  Item4 0.357
## 54   Item6  Item5 0.214
## 55   Item7  Item5 0.286
## 56   Item8  Item5 0.385
## 57   Item9  Item5 0.296
## 58  Item10  Item5 0.214
## 59  Item11  Item5 0.200
## 60  Item12  Item5 0.333
## 67   Item7  Item6 0.588
## 68   Item8  Item6 0.375
## 69   Item9  Item6 0.545
## 70  Item10  Item6 0.647
## 71  Item11  Item6 0.722
## 72  Item12  Item6 0.333
## 80   Item8  Item7 0.312
## 81   Item9  Item7 0.727
## 82  Item10  Item7 0.647
## 83  Item11  Item7 0.500
## 84  Item12  Item7 0.533
## 93   Item9  Item8 0.323
## 94  Item10  Item8 0.375
## 95  Item11  Item8 0.353
## 96  Item12  Item8 0.357
## 106 Item10  Item9 0.667
## 107 Item11  Item9 0.514
## 108 Item12  Item9 0.552
## 119 Item11 Item10 0.722
## 120 Item12 Item10 0.467
## 132 Item12 Item11 0.312
## 
## $df.to.return.common.words.count.no.conjunctions
##    Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9 Item10 Item11 Item12
## 1     19     6     3     8     4    13     9     5     9     11     12      4
## 2      6    17     3     4     3     4     4     3     4      4      4      2
## 3      3     3     8     3     6     3     4     4     4      3      3      4
## 4      8     4     3    14     3     7     7     5     7      6      6      4
## 5      4     3     6     3    11     3     4     5     4      3      3      4
## 6     13     4     3     7     3    16     9     5     8     10     12      4
## 7      9     4     4     7     4     9    14     4    10      9      8      6
## 8      5     3     4     5     5     5     4    14     4      5      5      4
## 9      9     4     4     7     4     8    10     4    14      9      8      6
## 10    11     4     3     6     3    10     9     5     9     15     12      5
## 11    12     4     3     6     3    12     8     5     8     12     18      4
## 12     4     2     4     4     4     4     6     4     6      5      4     11
## 
## $common.words.count.no.conjunctions.long
##       Var1   Var2 Freq
## 2    Item2  Item1    6
## 3    Item3  Item1    3
## 4    Item4  Item1    8
## 5    Item5  Item1    4
## 6    Item6  Item1   13
## 7    Item7  Item1    9
## 8    Item8  Item1    5
## 9    Item9  Item1    9
## 10  Item10  Item1   11
## 11  Item11  Item1   12
## 12  Item12  Item1    4
## 15   Item3  Item2    3
## 16   Item4  Item2    4
## 17   Item5  Item2    3
## 18   Item6  Item2    4
## 19   Item7  Item2    4
## 20   Item8  Item2    3
## 21   Item9  Item2    4
## 22  Item10  Item2    4
## 23  Item11  Item2    4
## 24  Item12  Item2    2
## 28   Item4  Item3    3
## 29   Item5  Item3    6
## 30   Item6  Item3    3
## 31   Item7  Item3    4
## 32   Item8  Item3    4
## 33   Item9  Item3    4
## 34  Item10  Item3    3
## 35  Item11  Item3    3
## 36  Item12  Item3    4
## 41   Item5  Item4    3
## 42   Item6  Item4    7
## 43   Item7  Item4    7
## 44   Item8  Item4    5
## 45   Item9  Item4    7
## 46  Item10  Item4    6
## 47  Item11  Item4    6
## 48  Item12  Item4    4
## 54   Item6  Item5    3
## 55   Item7  Item5    4
## 56   Item8  Item5    5
## 57   Item9  Item5    4
## 58  Item10  Item5    3
## 59  Item11  Item5    3
## 60  Item12  Item5    4
## 67   Item7  Item6    9
## 68   Item8  Item6    5
## 69   Item9  Item6    8
## 70  Item10  Item6   10
## 71  Item11  Item6   12
## 72  Item12  Item6    4
## 80   Item8  Item7    4
## 81   Item9  Item7   10
## 82  Item10  Item7    9
## 83  Item11  Item7    8
## 84  Item12  Item7    6
## 93   Item9  Item8    4
## 94  Item10  Item8    5
## 95  Item11  Item8    5
## 96  Item12  Item8    4
## 106 Item10  Item9    9
## 107 Item11  Item9    8
## 108 Item12  Item9    6
## 119 Item11 Item10   12
## 120 Item12 Item10    5
## 132 Item12 Item11    4

16.5.1 Μέσο πλήθος κοινών λέξεων και μέση τιμή του δείκτη ομοιότητας μεταξύ όλων των διαφορετικών ζευγών (συνάρτηση mean_common_words)

Παράδειγμα

mean_common_words(GOHAI.items.en, type = 'words')
## [1] 6.5
mean_common_words(GOHAI.items.en, type = 'index')
## [1] 0.407

16.6 Συσχέτιση μεταξύ των αποκρίσεων

my_cor_table(my.data.frame, c('Item1', 'Item2', 'Item3', 'Item4'))
Pearson Correlation Coefficients (*:p<.05, **:p<.01, ***:p<.001)
Item1 Item2 Item3 Item4
Item1 - .442 .028 .040
Item2 .442 - .109 .202
Item3 .028 .109 - -.507
Item4 .040 .202 -.507 -

16.7 Εσωτερική αξιοπιστία της κλίμακας (συνάρτηση my_reliability)

my_reliability(factorname = 'MyFactor', variables = c('Item1', 'Item2', 'Item3', 'Item4'), data = my.data.frame)
## 
## ΔΕΙΚΤΕΣ ΑΞΙΟΠΙΣΤΙΑΣ
## 
##        MyFactor
## alpha     0.242
## omega   335.743
## omega2  335.743
## omega3  388.718
## avevar  470.070

16.8 Ανάλυση δομής (συναρτήσεις iclust, alpha, omega)

Πηγή: https://cran.r-project.org/web/packages/psychTools/vignettes/factor.pdf

iclust(my.data.frame)

## ICLUST (Item Cluster Analysis)
## Call: iclust(r.mat = my.data.frame)
## 
## Purified Alpha:
## [1] 0.46
## 
## G6* reliability:
## [1] 0.67
## 
## Original Beta:
## [1] 0.068
## 
## Cluster size:
## [1] 4
## 
## Item by Cluster Structure matrix:
##       [,1] 
## Item1  0.36
## Item2  0.45
## Item3 -0.38
## Item4  0.60
## 
## With Sums of squares of:
## [1] 0.84
## 
## Purified scale intercorrelations
##  reliabilities on diagonal
##  correlations corrected for attenuation above diagonal: 
##      [,1]
## [1,] 0.46
## 
## Cluster fit =  0.38   Pattern fit =  0.88  RMSR =  0.22
alpha(my.data.frame)
## Some items ( Item3 ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## 
## Reliability analysis   
## Call: alpha(x = my.data.frame)
## 
##   raw_alpha std.alpha G6(smc) average_r  S/N ase mean  sd median_r
##       0.24      0.18    0.38     0.052 0.22 0.3  2.3 0.6    0.074
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt    -0.83  0.24  0.76
## Duhachek -0.34  0.24  0.82
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r   S/N alpha se var.r med.r
## Item1     -0.24     -0.22    0.13    -0.065 -0.18     0.61 0.148 0.109
## Item2     -0.23     -0.62   -0.17    -0.146 -0.38     0.47 0.097 0.028
## Item3      0.43      0.47    0.42     0.228  0.89     0.24 0.041 0.202
## Item4      0.38      0.42    0.38     0.193  0.72     0.28 0.048 0.109
## 
##  Item statistics 
##        n raw.r std.r  r.cor r.drop mean   sd
## Item1 12  0.84  0.70  0.538  0.308  2.8 1.60
## Item2 12  0.74  0.81  0.781  0.511  1.2 0.83
## Item3 12  0.32  0.29 -0.048 -0.099  3.5 1.00
## Item4 12  0.18  0.34  0.048 -0.115  1.8 0.72
## 
## Non missing response frequency for each item
##          0    1    2    3    4    5 miss
## Item1 0.08 0.17 0.17 0.25 0.17 0.17    0
## Item2 0.25 0.33 0.42 0.00 0.00 0.00    0
## Item3 0.00 0.00 0.17 0.33 0.33 0.17    0
## Item4 0.00 0.33 0.50 0.17 0.00 0.00    0
omega(my.data.frame)

## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar)
## Alpha:                 0.18 
## G.6:                   0.38 
## Omega Hierarchical:    0.26 
## Omega H asymptotic:    0.39 
## Omega Total            0.65 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##          g   F1*  F2*   F3*   h2   u2   p2
## Item1 0.29       0.50 -0.28 0.42 0.58 0.21
## Item2 0.52       0.63       0.68 0.32 0.39
## Item3       0.75       0.21 0.62 0.38 0.01
## Item4 0.20 -0.77            0.67 0.33 0.06
## 
## With Sums of squares  of:
##    g  F1*  F2*  F3* 
## 0.40 1.14 0.68 0.17 
## 
## general/max  0.35   max/min =   6.7
## mean percent general =  0.17    with sd =  0.17 and cv of  1.02 
## Explained Common Variance of the general factor =  0.17 
## 
## The degrees of freedom are -3  and the fit is  0 
## The number of observations was  12  with Chi Square =  0  with prob <  NA
## The root mean square of the residuals is  0 
## The df corrected root mean square of the residuals is  NA
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 2  and the fit is  0.49 
## The number of observations was  12  with Chi Square =  4  with prob <  0.14
## The root mean square of the residuals is  0.25 
## The df corrected root mean square of the residuals is  0.43 
## 
## RMSEA index =  0.276  and the 10 % confidence intervals are  0 0.736
## BIC =  -0.97 
## 
## Measures of factor score adequacy             
##                                                   g  F1*   F2*   F3*
## Correlation of scores with factors             0.54 0.87  0.68  0.53
## Multiple R square of scores with factors       0.29 0.76  0.47  0.28
## Minimum correlation of factor score estimates -0.42 0.52 -0.07 -0.45
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1*  F2* F3*
## Omega total for total scores and subscales    0.65 0.08 0.68  NA
## Omega general for total scores and subscales  0.26 0.08 0.23  NA
## Omega group for total scores and subscales    0.28 0.00 0.45  NA

17 Αναγνώριση Χρονοσειράς

x=ma.sim(mu = 2, theta = -0.4, number=50)
my.lag.plot.ts(x, lag.start = 1, lag.end = 9)

my.plot.ts(x)

my.plot.ts.acf(x)

18 Προσομοίωση κατανομών (για εκπαιδευτικούς λόγους)

18.1 Προσομοίωση κατανομής χ2

N = 100
Hypothesis = c(1/2, 1/4, 1/2)
simulate_x2_homogeneity_test(Hypothesis, N)

18.2 Προσομοίωση one sample t-test

mu = 500
sdp = 3
N = 30
simulate_one_sample_t_test(mu, sdp, N)

19 Αποθήκευση διαγράμματος σε αρχείο

Ένα γράφημα τελικά βρίσκει τη θέση του σε μία σελίδα της εργασίας. Μία συνηθισμένη απαίτηση των επιστημονικών περιοδικών είναι τα διαγράμματα να έχουν μέγεθος 12χ8cm με ανάλυση τουλάχιστον 600dpi. Η εξαγωγή ενός γραφήματος σύμφωνα με αυτές τις προδιαγραφές, μπορεί να γίνει απλά εκτελώντας την εντολή png πριν την δημιουργία του γραφήματος και την εντολή dev.off μετά. Η χρήση της εντολής png παρουσιάζεται στο παρακάτω ενδεικτικό παράδειγμα.

filename.for.plot = 'myplot.png'
png(filename.for.plot, res = 600, units = "mm", width=120, height=80)
plot_scatter(my.data.frame, agedata, examdata)
dev.off()

Η εκτέλεση του παραπάνω κώδικα θα δημιουργήσει το διάγραμμα και θα το αποθηκευσει στο αρχείο myplot.png, από το οποίο μπορεί να μεταφερθεί στο έγγραφο επιλέγοντας στο MS Word ή στο LibreOffice Calc Εισαγωγή -> Εικόνα.

20 Αποθήκευση dataframe σε αρχείο Excel

Η εξαγωγή ενός dataframe σε αρχείο Excel μπορεί να γίνει με τις παρακάτω εντολές:

library(writexl)
write_xlsx(my.df, 'file_name.xlsx')

21 Αποθήκευση dataframe σε αρχείο SPSS

Η εξαγωγή ενός dataframe σε αρχείο SPSS μπορεί να γίνει με τις παρακάτω εντολές:

library(haven)
write_xlsx(my.df, 'file_name.sav')

  1. Επικοινωνία: Επαμεινώνδας Διαμαντοπουλος, Τμήμα ΗΜ/ΜΥ, Δ.Π.Θ. Email: . Τηλ: 25410 79757, 6944683327↩︎