123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981 |
- // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
- // Copyright 2008-2016 National ICT Australia (NICTA)
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // ------------------------------------------------------------------------
- //! \addtogroup fn_accu
- //! @{
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_proxy_linear(const Proxy<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- eT val = eT(0);
-
- typename Proxy<T1>::ea_type Pea = P.get_ea();
-
- const uword n_elem = P.get_n_elem();
-
- if( arma_config::openmp && Proxy<T1>::use_mp && mp_gate<eT>::eval(n_elem) )
- {
- #if defined(ARMA_USE_OPENMP)
- {
- // NOTE: using parallelisation with manual reduction workaround to take into account complex numbers;
- // NOTE: OpenMP versions lower than 4.0 do not support user-defined reduction
-
- const int n_threads_max = mp_thread_limit::get();
- const uword n_threads_use = (std::min)(uword(podarray_prealloc_n_elem::val), uword(n_threads_max));
- const uword chunk_size = n_elem / n_threads_use;
-
- podarray<eT> partial_accs(n_threads_use);
-
- #pragma omp parallel for schedule(static) num_threads(int(n_threads_use))
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id)
- {
- const uword start = (thread_id+0) * chunk_size;
- const uword endp1 = (thread_id+1) * chunk_size;
-
- eT acc = eT(0);
- for(uword i=start; i < endp1; ++i) { acc += Pea[i]; }
-
- partial_accs[thread_id] = acc;
- }
-
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id) { val += partial_accs[thread_id]; }
-
- for(uword i=(n_threads_use*chunk_size); i < n_elem; ++i) { val += Pea[i]; }
- }
- #endif
- }
- else
- {
- #if defined(__FINITE_MATH_ONLY__) && (__FINITE_MATH_ONLY__ > 0)
- {
- if(P.is_aligned())
- {
- typename Proxy<T1>::aligned_ea_type Pea_aligned = P.get_aligned_ea();
-
- for(uword i=0; i<n_elem; ++i) { val += Pea_aligned.at_alt(i); }
- }
- else
- {
- for(uword i=0; i<n_elem; ++i) { val += Pea[i]; }
- }
- }
- #else
- {
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- uword i,j;
- for(i=0, j=1; j < n_elem; i+=2, j+=2) { val1 += Pea[i]; val2 += Pea[j]; }
-
- if(i < n_elem) { val1 += Pea[i]; }
-
- val = val1 + val2;
- }
- #endif
- }
-
- return val;
- }
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_proxy_at_mp(const Proxy<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- eT val = eT(0);
-
- #if defined(ARMA_USE_OPENMP)
- {
- const uword n_rows = P.get_n_rows();
- const uword n_cols = P.get_n_cols();
-
- if(n_cols == 1)
- {
- const int n_threads_max = mp_thread_limit::get();
- const uword n_threads_use = (std::min)(uword(podarray_prealloc_n_elem::val), uword(n_threads_max));
- const uword chunk_size = n_rows / n_threads_use;
-
- podarray<eT> partial_accs(n_threads_use);
-
- #pragma omp parallel for schedule(static) num_threads(int(n_threads_use))
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id)
- {
- const uword start = (thread_id+0) * chunk_size;
- const uword endp1 = (thread_id+1) * chunk_size;
-
- eT acc = eT(0);
- for(uword i=start; i < endp1; ++i) { acc += P.at(i,0); }
-
- partial_accs[thread_id] = acc;
- }
-
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id) { val += partial_accs[thread_id]; }
-
- for(uword i=(n_threads_use*chunk_size); i < n_rows; ++i) { val += P.at(i,0); }
- }
- else
- if(n_rows == 1)
- {
- const int n_threads_max = mp_thread_limit::get();
- const uword n_threads_use = (std::min)(uword(podarray_prealloc_n_elem::val), uword(n_threads_max));
- const uword chunk_size = n_cols / n_threads_use;
-
- podarray<eT> partial_accs(n_threads_use);
-
- #pragma omp parallel for schedule(static) num_threads(int(n_threads_use))
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id)
- {
- const uword start = (thread_id+0) * chunk_size;
- const uword endp1 = (thread_id+1) * chunk_size;
-
- eT acc = eT(0);
- for(uword i=start; i < endp1; ++i) { acc += P.at(0,i); }
-
- partial_accs[thread_id] = acc;
- }
-
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id) { val += partial_accs[thread_id]; }
-
- for(uword i=(n_threads_use*chunk_size); i < n_cols; ++i) { val += P.at(0,i); }
- }
- else
- {
- podarray<eT> col_accs(n_cols);
-
- const int n_threads = mp_thread_limit::get();
-
- #pragma omp parallel for schedule(static) num_threads(n_threads)
- for(uword col=0; col < n_cols; ++col)
- {
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- uword i,j;
- for(i=0, j=1; j < n_rows; i+=2, j+=2) { val1 += P.at(i,col); val2 += P.at(j,col); }
-
- if(i < n_rows) { val1 += P.at(i,col); }
-
- col_accs[col] = val1 + val2;
- }
-
- val = arrayops::accumulate(col_accs.memptr(), n_cols);
- }
- }
- #else
- {
- arma_ignore(P);
- }
- #endif
-
- return val;
- }
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_proxy_at(const Proxy<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- if(arma_config::openmp && Proxy<T1>::use_mp && mp_gate<eT>::eval(P.get_n_elem()))
- {
- return accu_proxy_at_mp(P);
- }
-
- const uword n_rows = P.get_n_rows();
- const uword n_cols = P.get_n_cols();
-
- eT val = eT(0);
-
- if(n_rows != 1)
- {
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- for(uword col=0; col < n_cols; ++col)
- {
- uword i,j;
- for(i=0, j=1; j < n_rows; i+=2, j+=2) { val1 += P.at(i,col); val2 += P.at(j,col); }
-
- if(i < n_rows) { val1 += P.at(i,col); }
- }
-
- val = val1 + val2;
- }
- else
- {
- for(uword col=0; col < n_cols; ++col) { val += P.at(0,col); }
- }
-
- return val;
- }
- //! accumulate the elements of a matrix
- template<typename T1>
- arma_warn_unused
- arma_hot
- inline
- typename enable_if2< is_arma_type<T1>::value, typename T1::elem_type >::result
- accu(const T1& X)
- {
- arma_extra_debug_sigprint();
-
- const Proxy<T1> P(X);
-
- if(is_Mat<typename Proxy<T1>::stored_type>::value || is_subview_col<typename Proxy<T1>::stored_type>::value)
- {
- const quasi_unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
-
- return arrayops::accumulate(tmp.M.memptr(), tmp.M.n_elem);
- }
-
- return (Proxy<T1>::use_at) ? accu_proxy_at(P) : accu_proxy_linear(P);
- }
- //! explicit handling of multiply-and-accumulate
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const eGlue<T1,T2,eglue_schur>& expr)
- {
- arma_extra_debug_sigprint();
-
- typedef eGlue<T1,T2,eglue_schur> expr_type;
-
- typedef typename expr_type::proxy1_type::stored_type P1_stored_type;
- typedef typename expr_type::proxy2_type::stored_type P2_stored_type;
-
- const bool have_direct_mem_1 = (is_Mat<P1_stored_type>::value) || (is_subview_col<P1_stored_type>::value);
- const bool have_direct_mem_2 = (is_Mat<P2_stored_type>::value) || (is_subview_col<P2_stored_type>::value);
-
- if(have_direct_mem_1 && have_direct_mem_2)
- {
- const quasi_unwrap<P1_stored_type> tmp1(expr.P1.Q);
- const quasi_unwrap<P2_stored_type> tmp2(expr.P2.Q);
-
- return op_dot::direct_dot(tmp1.M.n_elem, tmp1.M.memptr(), tmp2.M.memptr());
- }
-
- const Proxy<expr_type> P(expr);
-
- return (Proxy<expr_type>::use_at) ? accu_proxy_at(P) : accu_proxy_linear(P);
- }
- //! explicit handling of Hamming norm (also known as zero norm)
- template<typename T1>
- arma_warn_unused
- inline
- uword
- accu(const mtOp<uword,T1,op_rel_noteq>& X)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- const eT val = X.aux;
-
- const Proxy<T1> P(X.m);
-
- uword n_nonzero = 0;
-
- if(Proxy<T1>::use_at == false)
- {
- typedef typename Proxy<T1>::ea_type ea_type;
-
- ea_type A = P.get_ea();
- const uword n_elem = P.get_n_elem();
-
- for(uword i=0; i<n_elem; ++i)
- {
- n_nonzero += (A[i] != val) ? uword(1) : uword(0);
- }
- }
- else
- {
- const uword P_n_cols = P.get_n_cols();
- const uword P_n_rows = P.get_n_rows();
-
- if(P_n_rows == 1)
- {
- for(uword col=0; col < P_n_cols; ++col)
- {
- n_nonzero += (P.at(0,col) != val) ? uword(1) : uword(0);
- }
- }
- else
- {
- for(uword col=0; col < P_n_cols; ++col)
- for(uword row=0; row < P_n_rows; ++row)
- {
- n_nonzero += (P.at(row,col) != val) ? uword(1) : uword(0);
- }
- }
- }
-
- return n_nonzero;
- }
- template<typename T1>
- arma_warn_unused
- inline
- uword
- accu(const mtOp<uword,T1,op_rel_eq>& X)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- const eT val = X.aux;
-
- const Proxy<T1> P(X.m);
-
- uword n_nonzero = 0;
-
- if(Proxy<T1>::use_at == false)
- {
- typedef typename Proxy<T1>::ea_type ea_type;
-
- ea_type A = P.get_ea();
- const uword n_elem = P.get_n_elem();
-
- for(uword i=0; i<n_elem; ++i)
- {
- n_nonzero += (A[i] == val) ? uword(1) : uword(0);
- }
- }
- else
- {
- const uword P_n_cols = P.get_n_cols();
- const uword P_n_rows = P.get_n_rows();
-
- if(P_n_rows == 1)
- {
- for(uword col=0; col < P_n_cols; ++col)
- {
- n_nonzero += (P.at(0,col) == val) ? uword(1) : uword(0);
- }
- }
- else
- {
- for(uword col=0; col < P_n_cols; ++col)
- for(uword row=0; row < P_n_rows; ++row)
- {
- n_nonzero += (P.at(row,col) == val) ? uword(1) : uword(0);
- }
- }
- }
-
- return n_nonzero;
- }
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- uword
- accu(const mtGlue<uword,T1,T2,glue_rel_noteq>& X)
- {
- arma_extra_debug_sigprint();
-
- const Proxy<T1> PA(X.A);
- const Proxy<T2> PB(X.B);
-
- arma_debug_assert_same_size(PA, PB, "operator!=");
-
- uword n_nonzero = 0;
-
- if( (Proxy<T1>::use_at == false) && (Proxy<T2>::use_at == false) )
- {
- typedef typename Proxy<T1>::ea_type PA_ea_type;
- typedef typename Proxy<T2>::ea_type PB_ea_type;
-
- PA_ea_type A = PA.get_ea();
- PB_ea_type B = PB.get_ea();
- const uword n_elem = PA.get_n_elem();
-
- for(uword i=0; i < n_elem; ++i)
- {
- n_nonzero += (A[i] != B[i]) ? uword(1) : uword(0);
- }
- }
- else
- {
- const uword PA_n_cols = PA.get_n_cols();
- const uword PA_n_rows = PA.get_n_rows();
-
- if(PA_n_rows == 1)
- {
- for(uword col=0; col < PA_n_cols; ++col)
- {
- n_nonzero += (PA.at(0,col) != PB.at(0,col)) ? uword(1) : uword(0);
- }
- }
- else
- {
- for(uword col=0; col < PA_n_cols; ++col)
- for(uword row=0; row < PA_n_rows; ++row)
- {
- n_nonzero += (PA.at(row,col) != PB.at(row,col)) ? uword(1) : uword(0);
- }
- }
- }
-
- return n_nonzero;
- }
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- uword
- accu(const mtGlue<uword,T1,T2,glue_rel_eq>& X)
- {
- arma_extra_debug_sigprint();
-
- const Proxy<T1> PA(X.A);
- const Proxy<T2> PB(X.B);
-
- arma_debug_assert_same_size(PA, PB, "operator==");
-
- uword n_nonzero = 0;
-
- if( (Proxy<T1>::use_at == false) && (Proxy<T2>::use_at == false) )
- {
- typedef typename Proxy<T1>::ea_type PA_ea_type;
- typedef typename Proxy<T2>::ea_type PB_ea_type;
-
- PA_ea_type A = PA.get_ea();
- PB_ea_type B = PB.get_ea();
- const uword n_elem = PA.get_n_elem();
-
- for(uword i=0; i < n_elem; ++i)
- {
- n_nonzero += (A[i] == B[i]) ? uword(1) : uword(0);
- }
- }
- else
- {
- const uword PA_n_cols = PA.get_n_cols();
- const uword PA_n_rows = PA.get_n_rows();
-
- if(PA_n_rows == 1)
- {
- for(uword col=0; col < PA_n_cols; ++col)
- {
- n_nonzero += (PA.at(0,col) == PB.at(0,col)) ? uword(1) : uword(0);
- }
- }
- else
- {
- for(uword col=0; col < PA_n_cols; ++col)
- for(uword row=0; row < PA_n_rows; ++row)
- {
- n_nonzero += (PA.at(row,col) == PB.at(row,col)) ? uword(1) : uword(0);
- }
- }
- }
-
- return n_nonzero;
- }
- //! accumulate the elements of a subview (submatrix)
- template<typename eT>
- arma_warn_unused
- arma_hot
- inline
- eT
- accu(const subview<eT>& X)
- {
- arma_extra_debug_sigprint();
-
- const uword X_n_rows = X.n_rows;
- const uword X_n_cols = X.n_cols;
-
- eT val = eT(0);
-
- if(X_n_rows == 1)
- {
- typedef subview_row<eT> sv_type;
-
- const sv_type& sv = reinterpret_cast<const sv_type&>(X); // subview_row<eT> is a child class of subview<eT> and has no extra data
-
- const Proxy<sv_type> P(sv);
-
- val = accu_proxy_linear(P);
- }
- else
- if(X_n_cols == 1)
- {
- val = arrayops::accumulate( X.colptr(0), X_n_rows );
- }
- else
- {
- for(uword col=0; col < X_n_cols; ++col)
- {
- val += arrayops::accumulate( X.colptr(col), X_n_rows );
- }
- }
-
- return val;
- }
- template<typename eT>
- arma_warn_unused
- arma_hot
- inline
- eT
- accu(const subview_col<eT>& X)
- {
- arma_extra_debug_sigprint();
-
- return arrayops::accumulate( X.colptr(0), X.n_rows );
- }
- //
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_cube_proxy_linear(const ProxyCube<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- eT val = eT(0);
-
- typename ProxyCube<T1>::ea_type Pea = P.get_ea();
-
- const uword n_elem = P.get_n_elem();
-
- if( arma_config::openmp && ProxyCube<T1>::use_mp && mp_gate<eT>::eval(n_elem) )
- {
- #if defined(ARMA_USE_OPENMP)
- {
- // NOTE: using parallelisation with manual reduction workaround to take into account complex numbers;
- // NOTE: OpenMP versions lower than 4.0 do not support user-defined reduction
-
- const int n_threads_max = mp_thread_limit::get();
- const uword n_threads_use = (std::min)(uword(podarray_prealloc_n_elem::val), uword(n_threads_max));
- const uword chunk_size = n_elem / n_threads_use;
-
- podarray<eT> partial_accs(n_threads_use);
-
- #pragma omp parallel for schedule(static) num_threads(int(n_threads_use))
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id)
- {
- const uword start = (thread_id+0) * chunk_size;
- const uword endp1 = (thread_id+1) * chunk_size;
-
- eT acc = eT(0);
- for(uword i=start; i < endp1; ++i) { acc += Pea[i]; }
-
- partial_accs[thread_id] = acc;
- }
-
- for(uword thread_id=0; thread_id < n_threads_use; ++thread_id) { val += partial_accs[thread_id]; }
-
- for(uword i=(n_threads_use*chunk_size); i < n_elem; ++i) { val += Pea[i]; }
- }
- #endif
- }
- else
- {
- #if defined(__FINITE_MATH_ONLY__) && (__FINITE_MATH_ONLY__ > 0)
- {
- if(P.is_aligned())
- {
- typename ProxyCube<T1>::aligned_ea_type Pea_aligned = P.get_aligned_ea();
-
- for(uword i=0; i<n_elem; ++i) { val += Pea_aligned.at_alt(i); }
- }
- else
- {
- for(uword i=0; i<n_elem; ++i) { val += Pea[i]; }
- }
- }
- #else
- {
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- uword i,j;
- for(i=0, j=1; j<n_elem; i+=2, j+=2) { val1 += Pea[i]; val2 += Pea[j]; }
-
- if(i < n_elem) { val1 += Pea[i]; }
-
- val = val1 + val2;
- }
- #endif
- }
-
- return val;
- }
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_cube_proxy_at_mp(const ProxyCube<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- eT val = eT(0);
-
- #if defined(ARMA_USE_OPENMP)
- {
- const uword n_rows = P.get_n_rows();
- const uword n_cols = P.get_n_cols();
- const uword n_slices = P.get_n_slices();
-
- podarray<eT> slice_accs(n_slices);
-
- const int n_threads = mp_thread_limit::get();
-
- #pragma omp parallel for schedule(static) num_threads(n_threads)
- for(uword slice = 0; slice < n_slices; ++slice)
- {
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- for(uword col = 0; col < n_cols; ++col)
- {
- uword i,j;
- for(i=0, j=1; j<n_rows; i+=2, j+=2) { val1 += P.at(i,col,slice); val2 += P.at(j,col,slice); }
-
- if(i < n_rows) { val1 += P.at(i,col,slice); }
- }
-
- slice_accs[slice] = val1 + val2;
- }
-
- val = arrayops::accumulate(slice_accs.memptr(), slice_accs.n_elem);
- }
- #else
- {
- arma_ignore(P);
- }
- #endif
-
- return val;
- }
- template<typename T1>
- arma_hot
- inline
- typename T1::elem_type
- accu_cube_proxy_at(const ProxyCube<T1>& P)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- if(arma_config::openmp && ProxyCube<T1>::use_mp && mp_gate<eT>::eval(P.get_n_elem()))
- {
- return accu_cube_proxy_at_mp(P);
- }
-
- const uword n_rows = P.get_n_rows();
- const uword n_cols = P.get_n_cols();
- const uword n_slices = P.get_n_slices();
-
- eT val1 = eT(0);
- eT val2 = eT(0);
-
- for(uword slice = 0; slice < n_slices; ++slice)
- for(uword col = 0; col < n_cols; ++col )
- {
- uword i,j;
- for(i=0, j=1; j<n_rows; i+=2, j+=2) { val1 += P.at(i,col,slice); val2 += P.at(j,col,slice); }
-
- if(i < n_rows) { val1 += P.at(i,col,slice); }
- }
-
- return (val1 + val2);
- }
- //! accumulate the elements of a cube
- template<typename T1>
- arma_warn_unused
- arma_hot
- inline
- typename T1::elem_type
- accu(const BaseCube<typename T1::elem_type,T1>& X)
- {
- arma_extra_debug_sigprint();
-
- const ProxyCube<T1> P(X.get_ref());
-
- if(is_Cube<typename ProxyCube<T1>::stored_type>::value)
- {
- unwrap_cube<typename ProxyCube<T1>::stored_type> tmp(P.Q);
-
- return arrayops::accumulate(tmp.M.memptr(), tmp.M.n_elem);
- }
-
- return (ProxyCube<T1>::use_at) ? accu_cube_proxy_at(P) : accu_cube_proxy_linear(P);
- }
- //! explicit handling of multiply-and-accumulate (cube version)
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const eGlueCube<T1,T2,eglue_schur>& expr)
- {
- arma_extra_debug_sigprint();
-
- typedef eGlueCube<T1,T2,eglue_schur> expr_type;
-
- typedef typename ProxyCube<T1>::stored_type P1_stored_type;
- typedef typename ProxyCube<T2>::stored_type P2_stored_type;
-
- if(is_Cube<P1_stored_type>::value && is_Cube<P2_stored_type>::value)
- {
- const unwrap_cube<P1_stored_type> tmp1(expr.P1.Q);
- const unwrap_cube<P2_stored_type> tmp2(expr.P2.Q);
-
- return op_dot::direct_dot(tmp1.M.n_elem, tmp1.M.memptr(), tmp2.M.memptr());
- }
-
- const ProxyCube<expr_type> P(expr);
-
- return (ProxyCube<expr_type>::use_at) ? accu_cube_proxy_at(P) : accu_cube_proxy_linear(P);
- }
- //
- template<typename T>
- arma_warn_unused
- inline
- typename arma_scalar_only<T>::result
- accu(const T& x)
- {
- return x;
- }
- //! accumulate values in a sparse object
- template<typename T1>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const SpBase<typename T1::elem_type,T1>& expr)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- const SpProxy<T1> P(expr.get_ref());
-
- if(SpProxy<T1>::use_iterator == false)
- {
- // direct counting
- return arrayops::accumulate(P.get_values(), P.get_n_nonzero());
- }
- else
- {
- typename SpProxy<T1>::const_iterator_type it = P.begin();
-
- const uword P_n_nz = P.get_n_nonzero();
-
- eT val = eT(0);
-
- for(uword i=0; i < P_n_nz; ++i) { val += (*it); ++it; }
-
- return val;
- }
- }
- //! explicit handling of accu(A + B), where A and B are sparse matrices
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const SpGlue<T1,T2,spglue_plus>& expr)
- {
- arma_extra_debug_sigprint();
-
- const unwrap_spmat<T1> UA(expr.A);
- const unwrap_spmat<T2> UB(expr.B);
-
- arma_debug_assert_same_size(UA.M.n_rows, UA.M.n_cols, UB.M.n_rows, UB.M.n_cols, "addition");
-
- return (accu(UA.M) + accu(UB.M));
- }
- //! explicit handling of accu(A - B), where A and B are sparse matrices
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const SpGlue<T1,T2,spglue_minus>& expr)
- {
- arma_extra_debug_sigprint();
-
- const unwrap_spmat<T1> UA(expr.A);
- const unwrap_spmat<T2> UB(expr.B);
-
- arma_debug_assert_same_size(UA.M.n_rows, UA.M.n_cols, UB.M.n_rows, UB.M.n_cols, "subtraction");
-
- return (accu(UA.M) - accu(UB.M));
- }
- //! explicit handling of accu(A % B), where A and B are sparse matrices
- template<typename T1, typename T2>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const SpGlue<T1,T2,spglue_schur>& expr)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- const SpProxy<T1> px(expr.A);
- const SpProxy<T2> py(expr.B);
-
- typename SpProxy<T1>::const_iterator_type x_it = px.begin();
- typename SpProxy<T1>::const_iterator_type x_it_end = px.end();
-
- typename SpProxy<T2>::const_iterator_type y_it = py.begin();
- typename SpProxy<T2>::const_iterator_type y_it_end = py.end();
-
- eT acc = eT(0);
-
- while( (x_it != x_it_end) || (y_it != y_it_end) )
- {
- if(x_it == y_it)
- {
- acc += ((*x_it) * (*y_it));
-
- ++x_it;
- ++y_it;
- }
- else
- {
- const uword x_it_col = x_it.col();
- const uword x_it_row = x_it.row();
-
- const uword y_it_col = y_it.col();
- const uword y_it_row = y_it.row();
-
- if((x_it_col < y_it_col) || ((x_it_col == y_it_col) && (x_it_row < y_it_row))) // if y is closer to the end
- {
- ++x_it;
- }
- else // x is closer to the end
- {
- ++y_it;
- }
- }
- }
-
- return acc;
- }
- template<typename T1, typename spop_type>
- arma_warn_unused
- inline
- typename T1::elem_type
- accu(const SpOp<T1, spop_type>& expr)
- {
- arma_extra_debug_sigprint();
-
- typedef typename T1::elem_type eT;
-
- const bool is_vectorise = \
- (is_same_type<spop_type, spop_vectorise_row>::yes)
- || (is_same_type<spop_type, spop_vectorise_col>::yes)
- || (is_same_type<spop_type, spop_vectorise_all>::yes);
-
- if(is_vectorise)
- {
- return accu(expr.m);
- }
-
- const SpMat<eT> tmp = expr;
-
- return accu(tmp);
- }
- //! @}
|