enumerable-statistics 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +20 -3
- data/CHANGELOG.md +6 -0
- data/ext/enumerable/statistics/extension/statistics.c +226 -94
- data/lib/enumerable_statistics/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815b5f1b539e61935a709b7f859aeae73dd379959d8aa5e56551b650d20c7abc
|
4
|
+
data.tar.gz: be09795e4203a023e23a20b99c2bdd051ace5292ea7c67d6000cda0c3ed7c822
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b2af05ed047b86529af90b7c18091d04055b1ded985614a6e4c7422706bd1453bb993a3b84e4b2821c8ff725efc62edb09505e1f3e59865faa71921296cd45d
|
7
|
+
data.tar.gz: 97d2f6044a4ebb1a1ca4d068f999192c3255bf8213262c6d038007404be79ca36cc6eaeefab0830e0aa45a658d4e083024ad3a116040ee75e9acbda31984b69a
|
data/.github/workflows/ci.yml
CHANGED
@@ -16,6 +16,9 @@ jobs:
|
|
16
16
|
- macos-latest
|
17
17
|
- windows-latest
|
18
18
|
ruby:
|
19
|
+
- 3.3
|
20
|
+
- 3.2
|
21
|
+
- 3.1
|
19
22
|
- 3.0
|
20
23
|
- 2.7
|
21
24
|
- 2.6
|
@@ -23,8 +26,10 @@ jobs:
|
|
23
26
|
- 2.4
|
24
27
|
- debug
|
25
28
|
exclude:
|
26
|
-
- os:
|
27
|
-
ruby:
|
29
|
+
- os: macos-latest
|
30
|
+
ruby: 2.5
|
31
|
+
- os: macos-latest
|
32
|
+
ruby: 2.4
|
28
33
|
- os: windows-latest
|
29
34
|
ruby: debug
|
30
35
|
|
@@ -36,7 +41,19 @@ jobs:
|
|
36
41
|
with:
|
37
42
|
ruby-version: ${{ matrix.ruby }}
|
38
43
|
|
39
|
-
-
|
44
|
+
- name: Detect installable bundler version
|
45
|
+
run: |
|
46
|
+
case "${{ matrix.ruby }}" in
|
47
|
+
2.7|2.6) bundler_version="2.4.22" ;;
|
48
|
+
2.5|2.4) bundler_version="2.3.27" ;;
|
49
|
+
*) bundler_version="" ;;
|
50
|
+
esac
|
51
|
+
echo "bundler_version=$bundler_version" >> $GITHUB_ENV
|
52
|
+
shell: bash
|
53
|
+
|
54
|
+
- run: gem install bundler${bundler_version:+ -v $bundler_version}
|
55
|
+
shell: bash
|
56
|
+
|
40
57
|
- run: bundle install
|
41
58
|
|
42
59
|
- run: rake --trace compile
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 2.0.8
|
2
|
+
|
3
|
+
- Prohibit the use of both `nbins` and `edges` kwargs simultaneously in the `histogram` method.
|
4
|
+
- Support `skip_na` kwarg in `sum` and related methods.
|
5
|
+
- Support Ruby 3.4+.
|
6
|
+
|
1
7
|
# 2.0.7
|
2
8
|
|
3
9
|
- Fix the bug of histogram with bin range that is smaller than value range
|
@@ -96,11 +96,14 @@ static VALUE half_in_rational;
|
|
96
96
|
static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
|
97
97
|
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
|
98
98
|
static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
|
99
|
+
static ID id_skip_na;
|
99
100
|
|
100
|
-
static VALUE sym_auto, sym_left, sym_right;
|
101
|
+
static VALUE sym_auto, sym_left, sym_right, sym_sturges;
|
101
102
|
|
102
103
|
static VALUE cHistogram;
|
103
104
|
|
105
|
+
static VALUE orig_enum_sum, orig_ary_sum;
|
106
|
+
|
104
107
|
inline static VALUE
|
105
108
|
f_add(VALUE x, VALUE y)
|
106
109
|
{
|
@@ -135,7 +138,7 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
|
|
135
138
|
{
|
136
139
|
assert(!RB_TYPE_P(real, T_COMPLEX));
|
137
140
|
|
138
|
-
|
141
|
+
VALUE obj = rb_get_alloc_func(klass)(klass);
|
139
142
|
|
140
143
|
RCOMPLEX_SET_REAL(obj, real);
|
141
144
|
RCOMPLEX_SET_IMAG(obj, imag);
|
@@ -535,9 +538,11 @@ f_gcd(VALUE x, VALUE y)
|
|
535
538
|
inline static VALUE
|
536
539
|
nurat_s_new_internal(VALUE klass, VALUE num, VALUE den)
|
537
540
|
{
|
538
|
-
|
541
|
+
VALUE obj = rb_get_alloc_func(klass)(klass);
|
542
|
+
|
539
543
|
RRATIONAL_SET_NUM(obj, num);
|
540
544
|
RRATIONAL_SET_DEN(obj, den);
|
545
|
+
|
541
546
|
return (VALUE)obj;
|
542
547
|
}
|
543
548
|
|
@@ -632,39 +637,67 @@ rb_rational_plus(VALUE self, VALUE other)
|
|
632
637
|
}
|
633
638
|
#endif
|
634
639
|
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
640
|
+
static inline int
|
641
|
+
is_na(VALUE v)
|
642
|
+
{
|
643
|
+
if (NIL_P(v))
|
644
|
+
return 1;
|
645
|
+
|
646
|
+
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
647
|
+
return 1;
|
648
|
+
|
649
|
+
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
650
|
+
return 1;
|
651
|
+
|
652
|
+
return 0;
|
653
|
+
}
|
654
|
+
|
655
|
+
static int opt_skip_na(VALUE opts)
|
656
|
+
{
|
657
|
+
VALUE skip_na = Qfalse;
|
658
|
+
|
659
|
+
if (!NIL_P(opts)) {
|
660
|
+
#ifdef HAVE_RB_GET_KWARGS
|
661
|
+
ID kwargs = id_skip_na;
|
662
|
+
rb_get_kwargs(opts, &kwargs, 0, 1, &skip_na);
|
663
|
+
#else
|
664
|
+
VALUE val = rb_hash_aref(opts, ID2SYM(id_skip_na));
|
665
|
+
skip_na = NIL_P(val) ? skip_na : val;
|
666
|
+
#endif
|
667
|
+
}
|
668
|
+
|
669
|
+
return RTEST(skip_na);
|
670
|
+
}
|
671
|
+
|
672
|
+
VALUE
|
673
|
+
ary_calculate_sum(VALUE ary, VALUE init, int skip_na, long *na_count_out)
|
649
674
|
{
|
650
675
|
VALUE e, v, r;
|
651
676
|
long i, n;
|
652
677
|
int block_given;
|
653
|
-
|
654
|
-
if (rb_scan_args(argc, argv, "01", &v) == 0)
|
655
|
-
v = LONG2FIX(0);
|
678
|
+
long na_count = 0;
|
656
679
|
|
657
680
|
block_given = rb_block_given_p();
|
658
681
|
|
659
|
-
if (RARRAY_LEN(ary) == 0)
|
660
|
-
|
682
|
+
if (RARRAY_LEN(ary) == 0) {
|
683
|
+
if (na_count_out != NULL) {
|
684
|
+
*na_count_out = 0;
|
685
|
+
}
|
686
|
+
return init;
|
687
|
+
}
|
661
688
|
|
662
689
|
n = 0;
|
663
690
|
r = Qundef;
|
691
|
+
v = init;
|
664
692
|
for (i = 0; i < RARRAY_LEN(ary); i++) {
|
665
693
|
e = RARRAY_AREF(ary, i);
|
666
694
|
if (block_given)
|
667
695
|
e = rb_yield(e);
|
696
|
+
if (skip_na && is_na(e)) {
|
697
|
+
++na_count;
|
698
|
+
continue;
|
699
|
+
}
|
700
|
+
|
668
701
|
if (FIXNUM_P(e)) {
|
669
702
|
n += FIX2LONG(e); /* should not overflow long type */
|
670
703
|
if (!FIXABLE(n)) {
|
@@ -688,7 +721,7 @@ ary_sum(int argc, VALUE* argv, VALUE ary)
|
|
688
721
|
v = rb_fix_plus(LONG2FIX(n), v);
|
689
722
|
if (r != Qundef)
|
690
723
|
v = rb_rational_plus(r, v);
|
691
|
-
|
724
|
+
goto finish;
|
692
725
|
|
693
726
|
not_exact:
|
694
727
|
if (n != 0)
|
@@ -708,6 +741,11 @@ not_exact:
|
|
708
741
|
e = RARRAY_AREF(ary, i);
|
709
742
|
if (block_given)
|
710
743
|
e = rb_yield(e);
|
744
|
+
if (skip_na && is_na(e)) {
|
745
|
+
++na_count;
|
746
|
+
continue;
|
747
|
+
}
|
748
|
+
|
711
749
|
if (RB_FLOAT_TYPE_P(e))
|
712
750
|
has_float_value:
|
713
751
|
x = RFLOAT_VALUE(e);
|
@@ -725,7 +763,9 @@ not_exact:
|
|
725
763
|
c = (t - f) - y;
|
726
764
|
f = t;
|
727
765
|
}
|
728
|
-
|
766
|
+
|
767
|
+
v = DBL2NUM(f);
|
768
|
+
goto finish;
|
729
769
|
|
730
770
|
not_float:
|
731
771
|
v = DBL2NUM(f);
|
@@ -736,13 +776,53 @@ not_exact:
|
|
736
776
|
e = RARRAY_AREF(ary, i);
|
737
777
|
if (block_given)
|
738
778
|
e = rb_yield(e);
|
779
|
+
if (skip_na && is_na(e)) {
|
780
|
+
++na_count;
|
781
|
+
continue;
|
782
|
+
}
|
739
783
|
has_some_value:
|
740
784
|
v = rb_funcall(v, idPLUS, 1, e);
|
741
785
|
}
|
742
786
|
|
787
|
+
finish:
|
788
|
+
if (na_count_out != NULL) {
|
789
|
+
*na_count_out = na_count;
|
790
|
+
}
|
743
791
|
return v;
|
744
792
|
}
|
745
793
|
|
794
|
+
/* call-seq:
|
795
|
+
* ary.sum(skip_na: false)
|
796
|
+
*
|
797
|
+
* Calculate the sum of the values in `ary`.
|
798
|
+
* This method utilizes
|
799
|
+
* [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
|
800
|
+
* to compensate the result precision when the `ary` includes Float values.
|
801
|
+
*
|
802
|
+
* Note that This library does not redefine `sum` method introduced in Ruby 2.4.
|
803
|
+
*
|
804
|
+
* @return [Number] A summation value
|
805
|
+
*/
|
806
|
+
static VALUE
|
807
|
+
ary_sum(int argc, VALUE* argv, VALUE ary)
|
808
|
+
{
|
809
|
+
VALUE v, opts;
|
810
|
+
int skip_na;
|
811
|
+
|
812
|
+
if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) {
|
813
|
+
v = LONG2FIX(0);
|
814
|
+
}
|
815
|
+
skip_na = opt_skip_na(opts);
|
816
|
+
|
817
|
+
#ifndef HAVE_ENUM_SUM
|
818
|
+
if (!skip_na) {
|
819
|
+
return rb_funcall(orig_ary_sum, rb_intern("call"), argc, &v);
|
820
|
+
}
|
821
|
+
#endif
|
822
|
+
|
823
|
+
return ary_calculate_sum(ary, v, skip_na, NULL);
|
824
|
+
}
|
825
|
+
|
746
826
|
static void
|
747
827
|
calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
|
748
828
|
{
|
@@ -771,9 +851,10 @@ calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
|
|
771
851
|
}
|
772
852
|
|
773
853
|
static void
|
774
|
-
ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
854
|
+
ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof, int skip_na)
|
775
855
|
{
|
776
856
|
long i;
|
857
|
+
long na_count;
|
777
858
|
size_t n = 0;
|
778
859
|
double m = 0.0, m2 = 0.0, f = 0.0, c = 0.0;
|
779
860
|
|
@@ -797,8 +878,8 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
797
878
|
|
798
879
|
if (variance_ptr == NULL) {
|
799
880
|
VALUE init = DBL2NUM(0.0);
|
800
|
-
VALUE const sum =
|
801
|
-
long const n = RARRAY_LEN(ary);
|
881
|
+
VALUE const sum = ary_calculate_sum(ary, init, skip_na, &na_count);
|
882
|
+
long const n = RARRAY_LEN(ary) - na_count;
|
802
883
|
calculate_and_set_mean(mean_ptr, sum, n);
|
803
884
|
return;
|
804
885
|
}
|
@@ -839,26 +920,46 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
839
920
|
}
|
840
921
|
}
|
841
922
|
|
842
|
-
|
843
|
-
|
923
|
+
struct variance_opts {
|
924
|
+
int population;
|
925
|
+
int skip_na;
|
926
|
+
};
|
927
|
+
|
928
|
+
static void
|
929
|
+
get_variance_opts(VALUE opts, struct variance_opts *out)
|
844
930
|
{
|
845
|
-
|
931
|
+
assert(out != NULL);
|
932
|
+
|
933
|
+
out->population = 0;
|
934
|
+
out->skip_na = 0;
|
846
935
|
|
847
936
|
if (!NIL_P(opts)) {
|
848
937
|
#ifdef HAVE_RB_GET_KWARGS
|
849
|
-
ID
|
850
|
-
|
938
|
+
static ID kwarg_keys[2];
|
939
|
+
VALUE kwarg_vals;
|
940
|
+
|
941
|
+
if (!kwarg_keys[0]) {
|
942
|
+
kwarg_keys[0] = id_population;
|
943
|
+
kwarg_keys[1] = id_skip_na;
|
944
|
+
}
|
945
|
+
|
946
|
+
rb_get_kwargs(opts, &kwarg_keys, 0, 2, kwarg_vals);
|
947
|
+
out->population = (kwarg_vals[0] != Qundef) ? RTEST(kwarg_vals[0]) : out->population;
|
948
|
+
out->skip_na = (kwarg_vals[1] != Qundef) ? RTEST(kwarg_vals[1]) : out->skip_na;
|
851
949
|
#else
|
852
|
-
VALUE val
|
853
|
-
|
950
|
+
VALUE val;
|
951
|
+
|
952
|
+
val = rb_hash_aref(opts, ID2SYM(id_population));
|
953
|
+
out->population = NIL_P(val) ? out->population : RTEST(val);
|
954
|
+
|
955
|
+
val = rb_hash_aref(opts, ID2SYM(id_skip_na));
|
956
|
+
out->skip_na = NIL_P(val) ? out->skip_na : RTEST(val);
|
854
957
|
#endif
|
855
958
|
}
|
856
|
-
|
857
|
-
return RTEST(population);
|
858
959
|
}
|
859
960
|
|
860
961
|
/* call-seq:
|
861
|
-
* ary.mean_variance(population: false)
|
962
|
+
* ary.mean_variance(population: false, skip_na: false)
|
862
963
|
*
|
863
964
|
* Calculate a mean and a variance of the values in `ary`.
|
864
965
|
* The first element of the result array is the mean, and the second is the variance.
|
@@ -876,19 +977,21 @@ opt_population_p(VALUE opts)
|
|
876
977
|
static VALUE
|
877
978
|
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
|
878
979
|
{
|
879
|
-
|
980
|
+
struct variance_opts options;
|
981
|
+
VALUE opts, mean = Qnil, variance = Qnil;
|
880
982
|
size_t ddof = 1;
|
881
983
|
|
882
984
|
rb_scan_args(argc, argv, "0:", &opts);
|
883
|
-
|
985
|
+
get_variance_opts(opts, &options);
|
986
|
+
if (options.population)
|
884
987
|
ddof = 0;
|
885
988
|
|
886
|
-
ary_mean_variance(ary, &mean, &variance, ddof);
|
989
|
+
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
|
887
990
|
return rb_assoc_new(mean, variance);
|
888
991
|
}
|
889
992
|
|
890
993
|
/* call-seq:
|
891
|
-
* ary.mean
|
994
|
+
* ary.mean(skip_na: false)
|
892
995
|
*
|
893
996
|
* Calculate a mean of the values in `ary`.
|
894
997
|
* This method utilizes
|
@@ -898,15 +1001,20 @@ ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
|
|
898
1001
|
* @return [Number] A mean value
|
899
1002
|
*/
|
900
1003
|
static VALUE
|
901
|
-
ary_mean(VALUE ary)
|
1004
|
+
ary_mean(int argc, VALUE *argv, VALUE ary)
|
902
1005
|
{
|
903
|
-
VALUE mean;
|
904
|
-
|
1006
|
+
VALUE mean = Qnil, opts;
|
1007
|
+
int skip_na;
|
1008
|
+
|
1009
|
+
rb_scan_args(argc, argv, ":", &opts);
|
1010
|
+
skip_na = opt_skip_na(opts);
|
1011
|
+
|
1012
|
+
ary_mean_variance(ary, &mean, NULL, 1, skip_na);
|
905
1013
|
return mean;
|
906
1014
|
}
|
907
1015
|
|
908
1016
|
/* call-seq:
|
909
|
-
* ary.variance(population: false)
|
1017
|
+
* ary.variance(population: false, skip_na: false)
|
910
1018
|
*
|
911
1019
|
* Calculate a variance of the values in `ary`.
|
912
1020
|
* This method scan values in `ary` only once,
|
@@ -922,14 +1030,16 @@ ary_mean(VALUE ary)
|
|
922
1030
|
static VALUE
|
923
1031
|
ary_variance(int argc, VALUE* argv, VALUE ary)
|
924
1032
|
{
|
1033
|
+
struct variance_opts options;
|
925
1034
|
VALUE opts, variance;
|
926
1035
|
size_t ddof = 1;
|
927
1036
|
|
928
1037
|
rb_scan_args(argc, argv, "0:", &opts);
|
929
|
-
|
1038
|
+
get_variance_opts(opts, &options);
|
1039
|
+
if (options.population)
|
930
1040
|
ddof = 0;
|
931
1041
|
|
932
|
-
ary_mean_variance(ary, NULL, &variance, ddof);
|
1042
|
+
ary_mean_variance(ary, NULL, &variance, ddof, options.skip_na);
|
933
1043
|
return variance;
|
934
1044
|
}
|
935
1045
|
|
@@ -943,6 +1053,7 @@ struct enum_sum_memo {
|
|
943
1053
|
double f, c;
|
944
1054
|
int block_given;
|
945
1055
|
int float_value;
|
1056
|
+
int skip_na;
|
946
1057
|
};
|
947
1058
|
|
948
1059
|
static void
|
@@ -956,8 +1067,12 @@ sum_iter(VALUE e, struct enum_sum_memo *memo)
|
|
956
1067
|
double f = memo->f;
|
957
1068
|
double c = memo->c;
|
958
1069
|
|
959
|
-
if (memo->block_given)
|
1070
|
+
if (memo->block_given) {
|
960
1071
|
e = rb_yield(e);
|
1072
|
+
}
|
1073
|
+
if (memo->skip_na && is_na(e)) {
|
1074
|
+
return;
|
1075
|
+
}
|
961
1076
|
|
962
1077
|
memo->count += 1;
|
963
1078
|
|
@@ -1090,7 +1205,7 @@ int_range_sum_count(VALUE beg, VALUE end, int excl,
|
|
1090
1205
|
}
|
1091
1206
|
|
1092
1207
|
static void
|
1093
|
-
enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
1208
|
+
enum_sum_count(VALUE obj, VALUE init, int skip_na, VALUE *sum_ptr, long *count_ptr)
|
1094
1209
|
{
|
1095
1210
|
struct enum_sum_memo memo;
|
1096
1211
|
VALUE beg, end;
|
@@ -1101,6 +1216,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1101
1216
|
memo.block_given = rb_block_given_p();
|
1102
1217
|
memo.n = 0;
|
1103
1218
|
memo.r = Qundef;
|
1219
|
+
memo.skip_na = skip_na;
|
1104
1220
|
|
1105
1221
|
if ((memo.float_value = RB_FLOAT_TYPE_P(memo.v))) {
|
1106
1222
|
memo.f = RFLOAT_VALUE(memo.v);
|
@@ -1138,9 +1254,8 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1138
1254
|
*count_ptr = memo.count;
|
1139
1255
|
}
|
1140
1256
|
|
1141
|
-
#ifndef HAVE_ENUM_SUM
|
1142
1257
|
/* call-seq:
|
1143
|
-
* enum.sum
|
1258
|
+
* enum.sum(skip_na: false)
|
1144
1259
|
*
|
1145
1260
|
* Calculate the sum of the values in `enum`.
|
1146
1261
|
* This method utilizes
|
@@ -1154,16 +1269,27 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1154
1269
|
static VALUE
|
1155
1270
|
enum_sum(int argc, VALUE* argv, VALUE obj)
|
1156
1271
|
{
|
1157
|
-
VALUE sum, init;
|
1272
|
+
VALUE sum, init, opts;
|
1273
|
+
int skip_na;
|
1158
1274
|
|
1159
|
-
if (rb_scan_args(argc, argv, "01", &init) == 0)
|
1275
|
+
if (rb_scan_args(argc, argv, "01:", &init, &opts) == 0) {
|
1160
1276
|
init = LONG2FIX(0);
|
1277
|
+
}
|
1278
|
+
skip_na = opt_skip_na(opts);
|
1161
1279
|
|
1162
|
-
|
1280
|
+
#ifndef HAVE_ENUM_SUM
|
1281
|
+
if (skip_na) {
|
1282
|
+
enum_sum_count(obj, init, skip_na, &sum, NULL);
|
1283
|
+
}
|
1284
|
+
else {
|
1285
|
+
rb_funcall(orig_enum_sum, rb_intern("call"), argc, &init);
|
1286
|
+
}
|
1287
|
+
#else
|
1288
|
+
enum_sum_count(obj, init, skip_na, &sum, NULL);
|
1289
|
+
#endif
|
1163
1290
|
|
1164
1291
|
return sum;
|
1165
1292
|
}
|
1166
|
-
#endif
|
1167
1293
|
|
1168
1294
|
struct enum_mean_variance_memo {
|
1169
1295
|
int block_given;
|
@@ -1253,7 +1379,7 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
1253
1379
|
long n;
|
1254
1380
|
VALUE sum;
|
1255
1381
|
VALUE init = DBL2NUM(0.0);
|
1256
|
-
enum_sum_count(obj, init, &sum, &n);
|
1382
|
+
enum_sum_count(obj, init, 0, &sum, &n); /* TODO: skip_na */
|
1257
1383
|
if (n > 0)
|
1258
1384
|
calculate_and_set_mean(mean_ptr, sum, n);
|
1259
1385
|
return;
|
@@ -1303,11 +1429,13 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
1303
1429
|
static VALUE
|
1304
1430
|
enum_mean_variance_m(int argc, VALUE* argv, VALUE obj)
|
1305
1431
|
{
|
1432
|
+
struct variance_opts options;
|
1306
1433
|
VALUE opts, mean, variance;
|
1307
1434
|
size_t ddof = 1;
|
1308
1435
|
|
1309
1436
|
rb_scan_args(argc, argv, "0:", &opts);
|
1310
|
-
|
1437
|
+
get_variance_opts(opts, &options);
|
1438
|
+
if (options.population)
|
1311
1439
|
ddof = 0;
|
1312
1440
|
|
1313
1441
|
enum_mean_variance(obj, &mean, &variance, ddof);
|
@@ -1349,11 +1477,13 @@ enum_mean(VALUE obj)
|
|
1349
1477
|
static VALUE
|
1350
1478
|
enum_variance(int argc, VALUE* argv, VALUE obj)
|
1351
1479
|
{
|
1480
|
+
struct variance_opts options;
|
1352
1481
|
VALUE opts, variance;
|
1353
1482
|
size_t ddof = 1;
|
1354
1483
|
|
1355
1484
|
rb_scan_args(argc, argv, "0:", &opts);
|
1356
|
-
|
1485
|
+
get_variance_opts(opts, &options);
|
1486
|
+
if (options.population)
|
1357
1487
|
ddof = 0;
|
1358
1488
|
|
1359
1489
|
enum_mean_variance(obj, NULL, &variance, ddof);
|
@@ -1392,11 +1522,13 @@ sqrt_value(VALUE x)
|
|
1392
1522
|
static VALUE
|
1393
1523
|
enum_mean_stdev(int argc, VALUE* argv, VALUE obj)
|
1394
1524
|
{
|
1525
|
+
struct variance_opts options;
|
1395
1526
|
VALUE opts, mean, variance;
|
1396
1527
|
size_t ddof = 1;
|
1397
1528
|
|
1398
1529
|
rb_scan_args(argc, argv, "0:", &opts);
|
1399
|
-
|
1530
|
+
get_variance_opts(opts, &options);
|
1531
|
+
if (options.population)
|
1400
1532
|
ddof = 0;
|
1401
1533
|
|
1402
1534
|
enum_mean_variance(obj, &mean, &variance, ddof);
|
@@ -1446,14 +1578,16 @@ enum_stdev(int argc, VALUE* argv, VALUE obj)
|
|
1446
1578
|
static VALUE
|
1447
1579
|
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
|
1448
1580
|
{
|
1581
|
+
struct variance_opts options;
|
1449
1582
|
VALUE opts, mean, variance;
|
1450
1583
|
size_t ddof = 1;
|
1451
1584
|
|
1452
1585
|
rb_scan_args(argc, argv, "0:", &opts);
|
1453
|
-
|
1586
|
+
get_variance_opts(opts, &options);
|
1587
|
+
if (options.population)
|
1454
1588
|
ddof = 0;
|
1455
1589
|
|
1456
|
-
ary_mean_variance(ary, &mean, &variance, ddof);
|
1590
|
+
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
|
1457
1591
|
VALUE stdev = sqrt_value(variance);
|
1458
1592
|
return rb_assoc_new(mean, stdev);
|
1459
1593
|
}
|
@@ -1479,21 +1613,6 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
|
|
1479
1613
|
return stdev;
|
1480
1614
|
}
|
1481
1615
|
|
1482
|
-
static inline int
|
1483
|
-
is_na(VALUE v)
|
1484
|
-
{
|
1485
|
-
if (NIL_P(v))
|
1486
|
-
return 1;
|
1487
|
-
|
1488
|
-
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
1489
|
-
return 1;
|
1490
|
-
|
1491
|
-
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
1492
|
-
return 1;
|
1493
|
-
|
1494
|
-
return 0;
|
1495
|
-
}
|
1496
|
-
|
1497
1616
|
static int
|
1498
1617
|
ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
|
1499
1618
|
{
|
@@ -1900,7 +2019,7 @@ any_value_counts(int argc, VALUE *argv, VALUE obj,
|
|
1900
2019
|
struct value_counts_opts opts;
|
1901
2020
|
struct value_counts_memo memo;
|
1902
2021
|
|
1903
|
-
rb_scan_args(argc, argv, ":", &kwargs);
|
2022
|
+
rb_scan_args(argc, argv, "0:", &kwargs);
|
1904
2023
|
value_counts_extract_opts(kwargs, &opts);
|
1905
2024
|
|
1906
2025
|
memo.result = rb_hash_new();
|
@@ -2255,9 +2374,9 @@ ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long
|
|
2255
2374
|
}
|
2256
2375
|
|
2257
2376
|
static VALUE
|
2258
|
-
ary_histogram_calculate_edge(VALUE ary,
|
2377
|
+
ary_histogram_calculate_edge(VALUE ary, VALUE arg0, const int left_p)
|
2259
2378
|
{
|
2260
|
-
long n;
|
2379
|
+
long n, nbins;
|
2261
2380
|
VALUE minmax;
|
2262
2381
|
VALUE edge = Qnil;
|
2263
2382
|
double lo, hi;
|
@@ -2265,6 +2384,22 @@ ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
|
|
2265
2384
|
Check_Type(ary, T_ARRAY);
|
2266
2385
|
n = RARRAY_LEN(ary);
|
2267
2386
|
|
2387
|
+
if (NIL_P(arg0)) {
|
2388
|
+
arg0 = sym_auto;
|
2389
|
+
}
|
2390
|
+
|
2391
|
+
if (RB_TYPE_P(arg0, T_SYMBOL)) {
|
2392
|
+
if (arg0 != sym_auto && arg0 != sym_sturges) {
|
2393
|
+
rb_raise(rb_eArgError, "Unknown method to calculate bin width: %+"PRIsVALUE, arg0);
|
2394
|
+
}
|
2395
|
+
else {
|
2396
|
+
nbins = sturges(n);
|
2397
|
+
}
|
2398
|
+
}
|
2399
|
+
else {
|
2400
|
+
nbins = NUM2LONG(arg0);
|
2401
|
+
}
|
2402
|
+
|
2268
2403
|
if (n == 0 && nbins < 0) {
|
2269
2404
|
rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
|
2270
2405
|
}
|
@@ -2337,19 +2472,13 @@ static VALUE
|
|
2337
2472
|
ary_histogram(int argc, VALUE *argv, VALUE ary)
|
2338
2473
|
{
|
2339
2474
|
VALUE arg0, kwargs, bin_weights;
|
2340
|
-
long
|
2475
|
+
long n_bin_weights, i;
|
2341
2476
|
|
2342
2477
|
VALUE weight_array = Qnil;
|
2343
2478
|
VALUE edges = Qnil;
|
2344
2479
|
int left_p = 1;
|
2345
2480
|
|
2346
2481
|
rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
|
2347
|
-
if (NIL_P(arg0) || arg0 == sym_auto) {
|
2348
|
-
nbins = sturges(RARRAY_LEN(ary));
|
2349
|
-
}
|
2350
|
-
else {
|
2351
|
-
nbins = NUM2LONG(arg0);
|
2352
|
-
}
|
2353
2482
|
|
2354
2483
|
if (!NIL_P(kwargs)) {
|
2355
2484
|
enum { kw_weights, kw_edges, kw_closed };
|
@@ -2370,7 +2499,10 @@ ary_histogram(int argc, VALUE *argv, VALUE ary)
|
|
2370
2499
|
}
|
2371
2500
|
|
2372
2501
|
if (NIL_P(edges)) {
|
2373
|
-
edges = ary_histogram_calculate_edge(ary,
|
2502
|
+
edges = ary_histogram_calculate_edge(ary, arg0, left_p);
|
2503
|
+
}
|
2504
|
+
else if (! NIL_P(arg0)) {
|
2505
|
+
rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together");
|
2374
2506
|
}
|
2375
2507
|
|
2376
2508
|
n_bin_weights = RARRAY_LEN(edges) - 1;
|
@@ -2395,10 +2527,12 @@ Init_extension(void)
|
|
2395
2527
|
rb_ext_ractor_safe(true);
|
2396
2528
|
#endif
|
2397
2529
|
|
2398
|
-
|
2399
|
-
rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
|
2400
|
-
#endif
|
2530
|
+
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2401
2531
|
|
2532
|
+
orig_enum_sum = rb_funcall(rb_mEnumerable, rb_intern("public_instance_method"), 1, rb_str_new_cstr("sum"));
|
2533
|
+
orig_ary_sum = rb_funcall(rb_cArray, rb_intern("public_instance_method"), 1, rb_str_new_cstr("sum"));
|
2534
|
+
|
2535
|
+
rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
|
2402
2536
|
rb_define_method(rb_mEnumerable, "mean_variance", enum_mean_variance_m, -1);
|
2403
2537
|
rb_define_method(rb_mEnumerable, "mean", enum_mean, 0);
|
2404
2538
|
rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
|
@@ -2406,11 +2540,9 @@ Init_extension(void)
|
|
2406
2540
|
rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
|
2407
2541
|
rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
|
2408
2542
|
|
2409
|
-
#ifndef HAVE_ARRAY_SUM
|
2410
2543
|
rb_define_method(rb_cArray, "sum", ary_sum, -1);
|
2411
|
-
#endif
|
2412
2544
|
rb_define_method(rb_cArray, "mean_variance", ary_mean_variance_m, -1);
|
2413
|
-
rb_define_method(rb_cArray, "mean", ary_mean,
|
2545
|
+
rb_define_method(rb_cArray, "mean", ary_mean, -1);
|
2414
2546
|
rb_define_method(rb_cArray, "variance", ary_variance, -1);
|
2415
2547
|
rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
|
2416
2548
|
rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
|
@@ -2423,7 +2555,6 @@ Init_extension(void)
|
|
2423
2555
|
half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
|
2424
2556
|
rb_gc_register_mark_object(half_in_rational);
|
2425
2557
|
|
2426
|
-
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2427
2558
|
cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
|
2428
2559
|
|
2429
2560
|
rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
|
@@ -2449,6 +2580,7 @@ Init_extension(void)
|
|
2449
2580
|
id_population = rb_intern("population");
|
2450
2581
|
id_closed = rb_intern("closed");
|
2451
2582
|
id_edge = rb_intern("edge");
|
2583
|
+
id_skip_na = rb_intern("skip_na");
|
2452
2584
|
|
2453
2585
|
sym_auto = ID2SYM(rb_intern("auto"));
|
2454
2586
|
sym_left = ID2SYM(rb_intern("left"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: enumerable-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kenta Murata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -289,7 +289,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
289
289
|
- !ruby/object:Gem::Version
|
290
290
|
version: '0'
|
291
291
|
requirements: []
|
292
|
-
rubygems_version: 3.
|
292
|
+
rubygems_version: 3.5.9
|
293
293
|
signing_key:
|
294
294
|
specification_version: 4
|
295
295
|
summary: Statistics features for Enumerable
|