/*! \file yof_imageuty.hpp \brief 画像処理ユーティリティ \author Kenta Yoshimura Copyright(C) 2003-2004 Kenta Yoshimura All Rights Reserved. $Id$ */ #ifndef YOF_IMAGEUTY #define YOF_IMAGEUTY #if !defined(_MSC_VER) | _MSC_VER > 1000 #pragma once #endif #include "yof_image.hpp" #include #include #include #include #if yof_X86 # include #endif // yof_X86 namespace yoffy { //! \warning require : src.alpha is zero. inline cmyk RGBtoCMYK( rgba src ) { cmyk ret; uint32_t k; /* k = src.red > src.green ? src.red : src.green; k = k > src.blue ? k : src.blue; ret.black = 255 - k; ret.cyan = 255 - src.blue - k; ret.magenta = 255 - src.green - k; ret.yellow = 255 - src.red - k; */ k = src.red > src.green ? src.red : src.green; k = k > src.blue ? k : src.blue; //k = k | (k << 8) | (k << 16) | (k << 24); k |= k << 8; k |= k << 16; ret.ref = 0xffffffff - src.ref - k; return ret; } // 作成中 (10922 で割ってるのがテキトー) inline rgba HSVtoRGB( hsv src ) { rgba ret; int i = src.hue / 10922; int f = src.hue % 10922; int p1 = src.value * (255 - src.saturation) / 255; int p2 = src.value * (255 - src.saturation * f / 10922) / 255; int p3 = src.value * (255 - src.saturation * (10922 - f) / 10922) / 255; switch ( i ) { case 0: ret.red = src.value; ret.green = p3; ret.blue = p1; break; case 1: ret.red = p2; ret.green = src.value; ret.blue = p1; break; case 2: ret.red = p1; ret.green = src.value; ret.blue = p3; break; case 3: ret.red = p1; ret.green = p2; ret.blue = src.value; break; case 4: ret.red = p3; ret.green = p1; ret.blue = src.value; break; case 5: ret.red = src.value; ret.green = p1; ret.blue = p2; break; } return ret; } /*! \warning 計算を簡略化するため Hue は計算途中または計算結果として 65535 の値をとる事が出来ます。 そのまま計算を続けると誤差が蓄積していくため、 通常は正確な計算を行うようになっていますが、 速度を優先する場合は YOF_FAST_COLOR_CONVERSION を定義してください。 */ inline hsv RGBtoHSV( rgba src ) { hsv ret; int max, min; int range; if ( src.red > src.green ) { max = src.red; min = src.green; } else { max = src.green; min = src.red; } max = max > src.blue ? max : src.blue; min = min < src.blue ? min : src.blue; range = max - min; ret.value = max; if ( range == 0 ) { ret.saturation = 0; ret.hue = 0; } else { ret.saturation = range * 255 / max; // 21845 = USHRT_MAX / 3; // 10922 = 21845 / 2; int tR = (max - src.red) * 10922 / range; int tG = (max - src.green) * 10922 / range; int tB = (max - src.blue) * 10922 / range; if ( src.red == max ) #ifdef YOF_FAST_COLOR_CONVERSION ret.hue = 0 + tB - tG; #else if ( tB < tG ) ret.hue = 65535 + tB - tG; else ret.hue = 0 + tB - tG; #endif else if ( src.green == max ) ret.hue = 21845 + tR - tB; else ret.hue = 43690 + tG - tR; } return ret; } inline hsv RGBtoHSV( const rgb & src ) { rgba v; v.ref = src.ref(); return RGBtoHSV( v ); } typedef yof_image_iterator( rgba ) imagergba_iterator; typedef yof_image_iterator( rgb ) imagergb_iterator; typedef yof_image_iterator( hsv ) imagehsv_iterator; typedef yof_const_image_iterator( rgba ) const_imagergba_iterator; typedef yof_const_image_iterator( rgb ) const_imagergb_iterator; typedef yof_const_image_iterator( hsv ) const_imagehsv_iterator; typedef yof_image( rgba ) imagergba; typedef yof_image( rgb ) imagergb; typedef yof_image( hsv ) imagehsv; typedef yof_clipimage_iterator( rgba ) clipimagergba_iterator; typedef yof_clipimage_iterator( rgb ) clipimagergb_iterator; typedef yof_clipimage_iterator( hsv ) clipimagehsv_iterator; typedef yof_const_clipimage_iterator( rgba ) const_clipimagergba_iterator; typedef yof_const_clipimage_iterator( rgb ) const_clipimagergb_iterator; typedef yof_const_clipimage_iterator( hsv ) const_clipimagehsv_iterator; typedef yof_clipimage( rgba ) clipimagergba; typedef yof_clipimage( rgb ) clipimagergb; typedef yof_clipimage( hsv ) clipimagehsv; #ifdef _WINGDI_ #ifdef _MSC_VER # pragma pack( push, __YOF_BITMAPFILE__, 1 ) #endif template< int BIT_DEPTH > struct BITMAPFILE { BITMAPFILEHEADER header; BITMAPINFOHEADER info; RGBQUAD colors[ 2 << (BIT_DEPTH - 1) ]; uint8_t pixels[ 1 ]; }; template<> struct BITMAPFILE< 16 > { BITMAPFILEHEADER header; BITMAPINFOHEADER info; uint16_t pixels[ 1 ]; }; template<> struct BITMAPFILE< 24 > { BITMAPFILEHEADER header; BITMAPINFOHEADER info; rgb pixels[ 1 ]; }; template<> struct BITMAPFILE< 32 > { BITMAPFILEHEADER header; BITMAPINFOHEADER info; rgba pixels[ 1 ]; }; #ifdef _MSC_VER # pragma pack( pop, __YOF_BITMAPFILE__ ) #endif #endif // _WINGDI_ inline yoffy::imagehsv BMPtoHSV( const yoffy::clipimageu16 & src ) { yoffy::imagehsv dst( src.width(), src.height(), false ); yoffy::imagehsv_iterator dst_itr = dst.begin(); yoffy::imagehsv_iterator tail = dst.end(); yoffy::clipimageu16::const_iterator src_itr = src.begin(); while ( dst_itr < tail ) { rgba pixSrc; pixSrc.red = (*src_itr & 0x7c00) >> 7; pixSrc.green = (*src_itr & 0x3e0) >> 2; pixSrc.blue = (*src_itr & 0x1f) << 3; src_itr++; *dst_itr++ = RGBtoHSV( pixSrc ); } return dst; } inline yoffy::imagehsv BMPtoHSV( const yoffy::clipimagergb & src ) { yoffy::imagehsv dst( src.width(), src.height(), false ); yoffy::imagehsv_iterator dst_itr = dst.begin(); yoffy::imagehsv_iterator tail = dst.end(); yoffy::clipimagergb::const_iterator src_itr = src.begin(); while ( dst_itr < tail ) { rgba pixSrc; pixSrc.ref = src_itr->ref(); src_itr++; *dst_itr++ = RGBtoHSV( pixSrc ); } return dst; } inline yoffy::imagehsv BMPtoHSV( const yoffy::imagergba & src ) { yoffy::imagehsv dst( src.width(), src.height(), false ); yoffy::imagehsv_iterator dst_itr = dst.begin(); yoffy::imagehsv_iterator tail = dst.end(); yoffy::imagergba::const_iterator src_itr = src.begin(); while ( dst_itr < tail ) { *dst_itr++ = RGBtoHSV( *src_itr++ ); } return dst; } #ifdef _WINGDI_ inline yoffy::imagehsv BMPtoHSV( const BITMAPINFOHEADER * lpSrcInfo ) { const uint8_t * lpSrc = reinterpret_cast< const uint8_t * >( lpSrcInfo ) + lpSrcInfo->biSize; const int nWidth8 = lpSrcInfo->biWidth + (-(lpSrcInfo->biWidth & 0x03) & 0x03); const int nWidth16 = lpSrcInfo->biWidth + (-(lpSrcInfo->biWidth & 0x01) & 0x01); switch ( lpSrcInfo->biBitCount ) { case 16: { yoffy::imageu16 imgRawSrc( (uint16_t *)lpSrc, nWidth16, lpSrcInfo->biHeight ); yoffy::clipimageu16 imgSrc = imgRawSrc.clip( 0, 0, lpSrcInfo->biWidth, lpSrcInfo->biHeight ); return BMPtoHSV( imgSrc ); } break; case 24: { yoffy::imagergb imgRawSrc( (rgb *)lpSrc, nWidth8, lpSrcInfo->biHeight ); yoffy::clipimagergb imgSrc = imgRawSrc.clip( 0, 0, lpSrcInfo->biWidth, lpSrcInfo->biHeight ); return BMPtoHSV( imgSrc ); } break; case 32: { yoffy::imagergba imgSrc( (rgba *)lpSrc, lpSrcInfo->biWidth, lpSrcInfo->biHeight ); return BMPtoHSV( imgSrc ); } break; } return yoffy::imagehsv(); } inline void HSVtoBMP( BITMAPINFOHEADER * lpDstInfo, const yoffy::imagehsv & src ) { rgba * dst_itr = reinterpret_cast< rgba * >( reinterpret_cast< uint8_t * >( lpDstInfo ) + lpDstInfo->biSize ); yoffy::imagehsv::const_iterator src_itr = src.begin(); yoffy::imagehsv::const_iterator tail = src.end(); while ( src_itr < tail ) { *dst_itr++ = HSVtoRGB( *src_itr++ ); } } #endif // _WINGDI_ inline yoffy::imageu16 GetHue( const yoffy::imagehsv & src ) { yoffy::imageu16 dst( src.width(), src.height(), false ); yoffy::imagehsv::const_iterator src_itr = src.begin(); yoffy::imagehsv::const_iterator tail = src.end(); yoffy::imageu16_iterator dst_itr = dst.begin(); while ( src_itr < tail ) { *dst_itr++ = (*src_itr++).hue; } return dst; } inline yoffy::imageu8 GetSaturation( const yoffy::imagehsv & src ) { yoffy::imageu8 dst( src.width(), src.height(), false ); yoffy::imagehsv::const_iterator src_itr = src.begin(); yoffy::imagehsv::const_iterator tail = src.end(); yoffy::imageu8_iterator dst_itr = dst.begin(); while ( src_itr < tail ) { *dst_itr++ = (*src_itr++).saturation; } return dst; } inline yoffy::imageu8 GetValue( const yoffy::imagehsv & src ) { yoffy::imageu8 dst( src.width(), src.height(), false ); yoffy::imagehsv::const_iterator src_itr = src.begin(); yoffy::imagehsv::const_iterator tail = src.end(); yoffy::imageu8_iterator dst_itr = dst.begin(); while ( src_itr < tail ) { *dst_itr++ = (*src_itr++).value; } return dst; } inline yoffy::imagehsv HSVImage( const yoffy::imageu16 & hue, const yoffy::imageu8 & satulation, const yoffy::imageu8 & value ) { assert( hue.width() == satulation.width() ); assert( hue.width() == value.width() ); assert( hue.height() == satulation.height() ); assert( hue.height() == value.height() ); yoffy::imagehsv dst( hue.width(), hue.height(), false ); yoffy::imagehsv_iterator dst_itr = dst.begin(); yoffy::imagehsv_iterator tail = dst.end(); yoffy::imageu16::const_iterator hue_itr = hue.begin(); yoffy::imageu8::const_iterator sat_itr = satulation.begin(); yoffy::imageu8::const_iterator val_itr = value.begin(); while ( dst_itr < tail ) { dst_itr->hue = *hue_itr++; dst_itr->saturation = *sat_itr++; dst_itr->value = *val_itr++; dst_itr++; } return dst; } inline yoffy::imagergb RGBImage( const yoffy::imageu8 & red, const yoffy::imageu8 & green, const yoffy::imageu8 & blue ) { assert( red.width() == green.width() ); assert( red.width() == blue.width() ); assert( red.height() == green.height() ); assert( red.height() == blue.height() ); yoffy::imagergb dst( red.width(), red.height(), false ); yoffy::imagergb_iterator dst_itr = dst.begin(); yoffy::imagergb_iterator tail = dst.end(); yoffy::imageu8::const_iterator r_itr = red.begin(); yoffy::imageu8::const_iterator g_itr = green.begin(); yoffy::imageu8::const_iterator b_itr = blue.begin(); while ( dst_itr < tail ) { *dst_itr++ = rgb( *r_itr++, *g_itr++, *b_itr++ ); } return dst; } inline yoffy::imagehsv HSVImage( const yoffy::imagergb & imgRGB ) { yoffy::imagehsv dst( imgRGB.width(), imgRGB.height(), false ); yoffy::imagehsv_iterator dst_itr = dst.begin(); yoffy::imagehsv_iterator tail = dst.end(); yoffy::imagergb::const_iterator src_itr = imgRGB.begin(); while ( dst_itr < tail ) { *dst_itr++ = RGBtoHSV( *src_itr++ ); } return dst; } inline yoffy::imagergb RGBImage( const yoffy::imagehsv & imgHSV ) { yoffy::imagergb dst( imgHSV.width(), imgHSV.height(), false ); yoffy::imagergb_iterator dst_itr = dst.begin(); yoffy::imagergb_iterator tail = dst.end(); yoffy::imagehsv::const_iterator src_itr = imgHSV.begin(); while ( dst_itr < tail ) { rgba v = HSVtoRGB( *src_itr++ ); *dst_itr++ = rgb( v.ref ); } return dst; } /*! \brief maximum filter \param width real width = width * 2 + 1 一辺が width * 2 + 1 のサイズからなる矩形の中で最小の値を取り出します。 */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > minimum_filter( const IMG & src, const typename IMG::size_type width = 1 ) { using std::vector; typedef typename IMG::value_type value_type; typedef typename IMG::size_type size_type; typedef vector vector_type; typedef typename IMG::const_iterator src_itr; typedef typename int_t::inclusion inclu_t; typedef typename int_t::signed_t sinclu_t; assert( 0 < width ); const size_type bx = src.width(); const size_type by = src.height(); image< basic_image< yof_typename IMG::variable_iterator > > dst( bx, by, false ); const size_type mtxw = width * 2 + 1; vector_type cols; minimum< value_type > filter; const clamp wbound( 0, bx - 1 ); const clamp hbound( 0, by - 1 ); const size_type srcwidth = src.width(); cols.reserve( mtxw ); for ( size_type y = 0; y < by; y++ ) { cols.clear(); for ( size_type i = 0, ib = wbound( width ); i < ib; i++ ) { value_type min_val = src( i, hbound( y - width ) ); size_type js = hbound( y - width ); src_itr itr = src.at( i, js ); for ( size_type j = js, jb = hbound( y + width ); j <= jb; j++ ) { min_val = filter( min_val, *itr ); itr += srcwidth; } cols.push_back( min_val ); } for ( size_type x = 0; x < bx; x++ ) { size_type i = x + width; if ( i < bx ) { // minimum of right of matrix a vertical line value_type min_val = src( i, hbound( y - width ) ); size_type js = hbound( y - width ); src_itr itr = src.at( i, js ); for ( size_type j = js, jb = hbound( y + width ); j <= jb; j++ ) { min_val = filter( min_val, *itr ); itr += srcwidth; } cols.push_back( min_val ); } dst( x, y ) = *std::min_element( cols.begin(), cols.end() ); if ( x >= width ) cols.erase( cols.begin() ); } } return dst; } /*! \brief maximum filter \param width real width = width * 2 + 1 一辺が width * 2 + 1 のサイズからなる矩形の中で最大の値を取り出します。 */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > maximum_filter( const IMG & src, const typename IMG::size_type width = 1 ) { using std::vector; typedef typename IMG::value_type value_type; typedef typename IMG::size_type size_type; typedef vector vector_type; typedef typename IMG::const_iterator src_itr; typedef typename int_t::inclusion inclu_t; typedef typename int_t::signed_t sinclu_t; assert( 0 < width ); const size_type bx = src.width(); const size_type by = src.height(); image< basic_image< yof_typename IMG::variable_iterator > > dst( bx, by, false ); const size_type mtxw = width * 2 + 1; vector_type cols; maximum< value_type > filter; const clamp wbound( 0, bx - 1 ); const clamp hbound( 0, by - 1 ); const size_type srcwidth = src.width(); cols.reserve( mtxw ); for ( size_type y = 0; y < by; y++ ) { cols.clear(); for ( size_type i = 0; i < wbound( width ); i++ ) { value_type max_val = src( i, hbound( y - width ) ); size_type js = hbound( y - width ); src_itr itr = src.at( i, js ); for ( size_type j = js; j <= hbound( y + width ); j++ ) { max_val = filter( max_val, *itr ); itr += srcwidth; } cols.push_back( max_val ); } for ( size_type x = 0; x < bx; x++ ) { size_type i = x + width; if ( i < bx ) { // maximum of right of matrix a vertical line value_type max_val = src( i, hbound( y - width ) ); size_type js = hbound( y - width ); src_itr itr = src.at( i, js ); for ( size_type j = js; j <= hbound( y + width ); j++ ) { max_val = filter( max_val, *itr ); itr += srcwidth; } cols.push_back( max_val ); } dst( x, y ) = *std::max_element( cols.begin(), cols.end() ); if ( x >= width ) cols.erase( cols.begin() ); } } return dst; } /*! \brief 3x3 median filter median_filter よりも高速なアルゴリズムを使用しますが、端を考慮しません。 \sa median_filter, coarsely_median_filter */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > median_filter3x3( const IMG & src ) { using std::vector; typedef typename IMG::value_type value_type; typedef typename IMG::const_iterator src_itr; const int bx = src.width(); const int by = src.height(); image< basic_image< yof_typename IMG::variable_iterator > > dst( bx, by, false ); const int width = 1; const int mtxw = width * 2 + 1; const int mtxmedian = (mtxw * mtxw) >> 1; value_type *cols[ mtxw ], *cols2[ mtxw ]; const int srcwidth = src.width(); bool debf=false; for ( int i = mtxw - 1; i >= 0; i-- ) cols[ i ] = new value_type[ mtxw ]; for ( int y = 0; y < by; y++ ) { if ( y < width || by <= y + width ) { for ( int x = 0; x < bx; x++ ) dst( x, y ) = src( x, y ); } else { for ( int i = 1; i < mtxw; i++ ) { for ( int j = 0; j < mtxw; j++ ) cols[ i ][ j ] = src( i - 1, j + y - width ); std::sort( cols[ i ], cols[ i ] + mtxw ); } for ( int x = 0; x < bx; x++ ) { if ( x < width || bx <= x + width ) { dst( x, y ) = src( x, y ); } else { // read a vertical line and sort { src_itr itr = src.at( x + width, y - width ); value_type * const cp = cols[ 0 ]; cp[ 0 ] = itr[ 0 ]; cp[ 1 ] = itr[ srcwidth ]; cp[ 2 ] = itr[ srcwidth * 2 ]; std::sort( cols[ 0 ], cols[ 0 ] + mtxw ); } // rotate cols { value_type * const last = cols[ 0 ]; cols[ 0 ] = cols[ 1 ]; cols2[ 0 ] = cols[ 1 ]; cols[ 1 ] = cols[ 2 ]; cols2[ 1 ] = cols[ 2 ]; cols[ 2 ] = last; cols2[ 2 ] = last; } // holizontal sort { value_type * p; if ( cols2[ 0 ][ 1 ] > cols2[ 1 ][ 1 ] ) { p = cols2[ 0 ]; cols2[ 0 ] = cols2[ 1 ]; cols2[ 1 ] = p; } if ( cols2[ 1 ][ 1 ] > cols2[ 2 ][ 1 ] ) { p = cols2[ 1 ]; cols2[ 1 ] = cols2[ 2 ]; cols2[ 2 ] = p; } if ( cols2[ 0 ][ 1 ] > cols2[ 1 ][ 1 ] ) { p = cols2[ 0 ]; cols2[ 0 ] = cols2[ 1 ]; cols2[ 1 ] = p; } } // find median { value_type v[] = { cols2[ 0 ][ 2 ], cols2[ 1 ][ 1 ], cols2[ 2 ][ 0 ] }; if ( v[ 0 ] < v[ 1 ] && v[ 1 ] > v[ 2 ] ) { v[ 1 ] = cols2[ 1 ][ 0 ]; dst( x, y ) = *std::max_element( v, v + mtxw ); } else if ( v[ 0 ] > v[ 1 ] && v[ 1 ] < v[ 2 ] ) { v[ 1 ] = cols2[ 1 ][ 2 ]; dst( x, y ) = *std::min_element( v, v + mtxw ); } else { dst( x, y ) = v[ 1 ]; } } } } } } for ( int i = mtxw - 1; i >= 0; i-- ) { delete[] cols[ i ]; } return dst; } /*! \brief median filter \param width real width = width * 2 + 1 \param range The range it is considered that is median. real range = range * 2 + 1 一辺が width * 2 + 1 のサイズからなる矩形の中の中央値を取り出します。
真の中央値を得る事よりも速度を優先する場合、median_filter3x3, coarsely_median_filter の 使用を検討してください。 \sa median_filter3x3, coarsely_median_filter */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > median_filter( const IMG & src, const int width = 1, const int range = 0 ) { typedef typename IMG::value_type value_type; //typedef typename IMG::size_type size_type; typedef int size_type; typedef typename IMG::const_iterator src_itr; typedef typename int_t::inclusion inclu_t; typedef typename int_t::signed_t sinclu_t; assert( 0 < width ); assert( 0 <= range ); const size_type bx = src.width(); const size_type by = src.height(); image< basic_image< yof_typename IMG::variable_iterator > > dst( bx, by, false ); const size_type pixels = (width * 2 + 1) * (width * 2 + 1); std::vector< value_type > matrix; const clamp wbound( 0, bx - 1 ); const clamp hbound( 0, by - 1 ); matrix.reserve( pixels ); for ( size_type y = 0; y < by; y++ ) { for ( size_type x = 0; x < bx; x++ ) { matrix.clear(); const int bi = wbound( x + width ); const int bj = hbound( y + width ); for ( int j = hbound( y - width ); j <= bj; j++ ) for ( int i = wbound( x - width ); i <= bi; i++ ) matrix.push_back( src( i, j ) ); std::sort( matrix.begin(), matrix.end() ); const int mtxsize = size_type(matrix.size()); const int rated_range = range * mtxsize / pixels; const int mtxhalf = (mtxsize >> 1) - (~mtxsize & 1); const int clamped_range = rated_range <= mtxhalf ? rated_range : mtxhalf; const int top = mtxhalf - clamped_range; const int bottom = mtxhalf + clamped_range; const value_type value = src( x, y ); if ( matrix[ top ] <= value && value <= matrix[ bottom ] ) dst( x, y ) = value; else dst( x, y ) = matrix[ mtxhalf ]; } } return dst; } /*! \brief wraparound value median filter \param width real width = width * 2 + 1 \param range The range it is considered that is median. real range = range * 2 + 1 \warning 値は型の取りうる最小値と最大値に規格化されている必要があります。 一辺が width * 2 + 1 のサイズからなる矩形の中の中央値を取り出します。
この関数は、型の取りうる最小値と最大値がラップアラウンドする場合に 使用します。
角度のように最大値に達すると最小値に戻るような場合に有効ですが、 型の範囲に規格化されている必要があるため、 radian でも degree でもいけません。例えば型が unsigned int であれば、 0 から UINT_MAX の範囲に規格化してください。 */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > wrapvalue_median_filter( const IMG & src, const int width = 1, const int range = 0 ) { typedef typename IMG::value_type value_type; assert( 0 < width ); assert( 0 <= range ); const int bx = src.width(); const int by = src.height(); image< basic_image< yof_typename IMG::variable_iterator > > dst( bx, by, false ); const int pixels = (width * 2 + 1) * (width * 2 + 1); std::vector< value_type > matrix; const clamp< int > wbound( 0, bx - 1 ); const clamp< int > hbound( 0, by - 1 ); matrix.reserve( pixels ); for ( int y = 0; y < by; y++ ) { for ( int x = 0; x < bx; x++ ) { matrix.clear(); const int bi = wbound( x + width ); const int bj = hbound( y + width ); for ( int j = hbound( y - width ); j <= bj; j++ ) for ( int i = wbound( x - width ); i <= bi; i++ ) matrix.push_back( src( i, j ) ); std::sort( matrix.begin(), matrix.end() ); const int mtxsize = matrix.size(); value_type rotate = 1 << (sizeof( value_type ) * 8 - 1); value_type max_diff = (matrix[ 0 ] + rotate) - (matrix[ mtxsize - 1 ] + rotate); int max_diff_index = 0; for ( int i = matrix.size() - 2; i >= 0; i-- ) { const value_type diff = matrix[ i + 1 ] - matrix[ i ]; if ( max_diff < diff ) { max_diff = diff; max_diff_index = i + 1; } } const int rated_range = range * mtxsize / pixels; const int mtxhalf = (mtxsize >> 1) - (~mtxsize & 1); const int clamped_range = rated_range <= mtxhalf ? rated_range : mtxhalf; const int top = (max_diff_index + mtxhalf - clamped_range) % mtxsize; const int bottom = (max_diff_index + mtxhalf + clamped_range) % mtxsize; const int median = (max_diff_index + mtxhalf) % mtxsize; const value_type value = src( x, y ); if ( top <= bottom ) { if ( matrix[ top ] <= value && value <= matrix[ bottom ] ) dst( x, y ) = value; else dst( x, y ) = matrix[ median ]; } else { if ( matrix[ bottom ] < value && value < matrix[ top ] ) dst( x, y ) = matrix[ median ]; else dst( x, y ) = value; } } } return dst; } /*! \brief moving average \param width real width = width * 2 + 1 */ template< class IMG > image< basic_image< yof_typename IMG::variable_iterator > > moving_average( const IMG & src, const int width = 1 ) { typedef typename IMG::value_type value_type; typedef typename IMG::const_iterator src_iterator; typedef typename IMG::variable_iterator dst_iterator; typedef typename int_t< value_type >::inclusion tmp_inclusion; typedef typename int_t< tmp_inclusion >::gen value_inclusion; typedef std::vector< value_inclusion > line_type; typedef typename line_type::iterator line_iterator; typedef image< basic_image< yof_typename IMG::variable_iterator > > dst_image; assert( 0 < width ); const int bx = src.width(); const int by = src.height(); dst_image dst( bx, by, false ); const int pixels = (width * 2 + 1) * (width * 2 + 1); line_type line( src.width() ); const clamp wbound( 0, bx - 1 ); const clamp hbound( 0, by - 1 ); dst_iterator dst_itr = dst.begin(); // sum src's vertical to line { src_iterator src_itr = src.begin(); const int ty = hbound(width) + (by <= width); for ( int y = 0; y < ty; y++ ) { src_iterator tail = src.at( 0, y + 1 ); line_iterator line_itr = line.begin(); while ( src_itr < tail ) { *line_itr++ += *src_itr++; } } } for ( int y = 0; y < by; y++ ) { if ( y > width ) { // remove top of matrix line src_iterator src_itr = src.at( 0, y - width - 1 ); src_iterator tail = src.at( 0, y - width ); line_iterator line_itr = line.begin(); while ( src_itr < tail ) { *line_itr++ -= *src_itr++; } } if ( y + width < by ) { // add bottom of matrix line src_iterator src_itr = src.at( 0, y + width ); src_iterator tail = src.at( 0, y + width + 1 ); line_iterator line_itr = line.begin(); while ( src_itr < tail ) { *line_itr++ += *src_itr++; } } { line_iterator line_itr = line.begin(); int mtxh = hbound( y + width ) - hbound( y - width ) + 1; value_inclusion value = 0; const int ti = wbound(width) + (bx <= width); for ( int i = 0; i < ti; i++ ) value += line_itr[ i ]; for ( int x = 0; x < bx; x++ ) { int mtxw = wbound( x + width ) - wbound( x - width ) + 1; if ( x > width ) value -= line_itr[ -width - 1 ]; if ( x + width < bx ) value += line_itr[ width ]; line_itr++; *dst_itr++ = value / (mtxw * mtxh); } } } return dst; } /*! \brief Gaussian Blur \param src 入力画像 兼 出力画像 \param width real width = width * 4 + 1 通常、入力画像 s、出力画像 d と gauss 関数 g があった場合 for ( i = 0; i < s.width; i++ ) d[i] = s[i-1]*g[1] + s[i]*g[0] + s[i+1]*g[1]; と s と d を中心にして考えるが、この関数は 手っ取り早く並列化するために g を中心にして考える。 */ template< class IMG > //image< basic_image< yof_typename IMG::variable_iterator > > void gaussian_blur( //const IMG & src, IMG & src, const int width = 1 ) { using std::vector; typedef typename IMG::value_type value_type; typedef image< basic_image< yof_typename IMG::variable_iterator > > dst_image; const int nRange = width * 4 + 1; const int bx = src.width(), by = src.height(); vector gauss(nRange); vector gaussSum(nRange); vector line(yof_max( bx, by )); dst_image dst( bx, by, false ); { const double d = 1.0 / (2 * width * width); double dSum = 0; // ガウス関数をテーブル化する(合計も求めておく) for ( int i = 0; i < nRange; i++ ) { gauss[i] = exp( -i * i * d ); // 中心(gauss[0])から左右に伸びているので 2 倍する dSum += gauss[i] * 2; } // 合計が 1 になるように規格化 // 中心は 1 点なので 2 倍した分を引いておく dSum = 1.0 / (dSum - gauss[0]); for ( int i = 0; i < nRange; i++ ) gauss[i] *= dSum; // 画像の端は width より点数が少なくなるので // 平均するべき値をテーブル化しておく dSum = 1.0; for ( int i = nRange - 1; 0 <= i; i-- ) { gaussSum[i] = 1.0 / dSum; dSum -= gauss[i]; } } // 水平方向の処理 for ( int y = 0; y < by; y++ ) { // gauss[0]は中央しかないのでここでかける // ループで汚れた line 配列の初期化にもなっている { const double d = gauss[0]; for ( int x = 0; x < bx; x++ ) line[x] = src(x, y) * d; } // gauss[1]以降は左右とも同じ値が使えるので 2 pixel ずつかける for ( int i = 1; i < nRange; i++ ) { const double d = gauss[i]; for ( int x = i; x < bx - i; x++ ) { double v = src(x, y) * d; line[x - i] += v; line[x + i] += v; } // 画像左端、右端の処理(ピクセル数が width 未満になるので) for ( int x = 0; x < yof_min( i, bx ); x++ ) line[x + i] += src(x, y) * d; for ( int x = bx - i; x < bx; x++ ) line[x - i] += src(x, y) * d; } // 左端、右端の平均 for ( int x = 0; x < yof_min( nRange, bx ); x++ ) { const int i = bx - x - 1; const double d = gaussSum[x]; dst(x, y) = line[x] * d; dst(i, y) = line[i] * d; } // 中央 for ( int x = nRange; x < bx - nRange; x++ ) dst(x, y) = line[x]; } // 垂直方向の処理(上と同じ) for ( int x = 0; x < bx; x++ ) { { const double d = gauss[0]; for ( int y = 0; y < by; y++ ) line[y] = dst(x, y) * d; } for ( int i = 1; i < nRange; i++ ) { const double d = gauss[i]; for ( int y = i; y < by - i; y++ ) { double v = dst(x, y) * d; line[y - i] += v; line[y + i] += v; } for ( int y = 0; y < yof_min( i, by ); y++ ) line[y + i] += dst(x, y) * d; for ( int y = by - i; y < by; y++ ) line[y - i] += dst(x, y) * d; } for ( int y = 0; y < yof_min( nRange, by ); y++ ) { const int i = by - y - 1; const double d = gaussSum[y]; src(x, y) = line[y] * d; src(x, i) = line[i] * d; } // 中央 for ( int y = nRange; y < by - nRange; y++ ) src(x, y) = line[y]; } //return dst; } /*! \brief 線上に演算を適用する \param op IMG::value_type operator(IMG::value_type pixel, IMG::value_type value) 関数を持つファンクタ ファンクタ op を変更することで、ピクセルと value を元に演算した値を設定することが出来ます。 */ template void line( IMG & src, int x1, int y1, int x2, int y2, yof_typename IMG::value_type value, OP & op ) { typedef typename IMG::size_type size_type; typedef double reg_t; // 余っている符号付きレジスタの型(int でも double でもいい) reg_t dx = abs( x2 - x1 ); reg_t dy = abs( y2 - y1 ); int sx = (x2 < x1) ? -1 : 1; int sy = (y2 < y1) ? -1 : 1; if ( dy <= dx ) { reg_t e = dx - (y2 < y1); dx *= 2; dy *= 2; for ( ; x1 != x2; x1 += sx ) { if ( e >= dx ) { e -= dx; y1 += sy; } src( size_type(x1), size_type(y1) ) = op( src( size_type(x1), size_type(y1) ), value ); e += dy; } } else { reg_t e = dy - (x2 < x1); dx *= 2; dy *= 2; for ( ; y1 != y2; y1 += sy ) { if ( e >= dy ) { e -= dy; x1 += sx; } src( size_type(x1), size_type(y1) ) = op( src( size_type(x1), size_type(y1) ), value ); e += dx; } } src( size_type(x2), size_type(y2) ) = op( src( size_type(x2), size_type(y2) ), value ); } //! draw line template void line( IMG & src, int x1, int y1, int x2, int y2, yof_typename IMG::value_type value ) { return line( src, x1, y1, x2, y2, value, takeright() ); } #if yof_X86 namespace x86 { struct cpuid_t { unsigned a, b, c, d; cpuid_t() : a(0), b(0), c(0), d(0) {} inline cpuid_t( unsigned mode ) { cpuid( mode ); } inline void cpuid( unsigned mode ) { yof_asm_using_this(); yof_asm { mov eax, mode mov esi, yof_asm_this cpuid mov dword ptr [esi ], eax mov dword ptr [esi + 4], ebx mov dword ptr [esi + 8], ecx mov dword ptr [esi + 12], edx } } }; } #endif //! scalar_madd_simd が使用可能かどうか template inline int is_scalar_madd_simd_enabled( const T v ) { return 0; } template inline void scalar_madd_simd( const T * first, const T * last, const T value, T * dest ) { assert( false ); } #if yof_X86 //! scalar_madd_simd が使用可能かどうか template<> inline int is_scalar_madd_simd_enabled( const float v ) { x86::cpuid_t id( 0x01 ); return id.d & (1 << 25); } /*! \brief SSE を使って first から last までの値に value を掛けて dest に足します \param first 配列の先頭の要素を指したポインタ \param last 配列の最後の要素の次を指したポインタ \param value 掛ける値 \param dest 足す値および、結果 次の演算を行います: \code while ( first < last ) *dest++ += *first++ * value; \endcode */ template<> inline void scalar_madd_simd( const float * first, const float * last, const float value, float * dest ) { assert( first < last ); /* // C++ algorithm const __m128 v = _mm_set_ps1( value ); if ( reinterpret_cast(first) & 0xf == reinterpret_cast(dest) & 0xf ) { const char * pre = reinterpret_cast(first) + 512; const __m128 * l = reinterpret_cast(reinterpret_cast(last) & -16) - 1; while ( first < last && ((int)(first) & 0xf) ) { _mm_prefetch( pre, _MM_HINT_T1 ); pre += 32; *dest++ += value * *first++; } const __m128 * f = reinterpret_cast(first); __m128 * d = reinterpret_cast<__m128 *>(dest); while ( f < l ) { _mm_prefetch( pre, _MM_HINT_T1 ); pre += 32; d[0] = _mm_add_ps( d[0], _mm_mul_ps( f[0], v ) ); d[1] = _mm_add_ps( d[1], _mm_mul_ps( f[1], v ) ); d += 2; f += 2; } first = reinterpret_cast(f); dest = reinterpret_cast(d); } while ( first < last ) *dest++ += value * *first++; */ // Assembly algorithm yof_asm { mov eax, last ; eax = last mov esi, first ; esi = first and eax, -16 ; eax 16 byte (xmm register size) align mov edi, dest ; edi = dest movss xmm0, dword ptr[value] ; xmm0[0] = value prefetcht0 byte ptr [esi] lea ebx, xmmword ptr [esi + 512] ; pre = first + 512 mov ecx, esi mov edx, edi sub eax, 32 ; margin for unroll and ecx, 0fh and edx, 0fh cmp ecx, edx prefetcht1 byte ptr [ebx - 64] shufps xmm0, xmm0, 0 ; xmm0[:] = xmm0[0] jne Llast4_init ; (first & 0xf) != (dest & 0xf) ? Llast4_init cmp esi, eax jae Llast4_init ; esi >= eax ? Llast4_init ; esi on tail test esi, 0fh prefetcht1 byte ptr [ebx - 32] jz Lmain_init ; esi & 15 == 0 ? Lmain_init ; aligned align 16 Lalign: movaps xmm1, xmm0 ; xmm1 = value prefetcht1 byte ptr [ebx] mulss xmm1, dword ptr [esi] ; xmm1 *= *first add esi, 4 ; first++ addss xmm1, dword ptr [edi] ; xmm1 += *dest add ebx, 32 ; pre += 32 movss dword ptr [edi], xmm1 ; *dest = xmm1 cmp esi, eax lea edi, dword ptr [edi + 4] ; dest++ jae Llast1_init ; first >= last ? Llast1_init ; esi on tail test esi, 0fh jnz Lalign ; first & 15 != 0 ? Lalign ; no aligned align 16 Lmain_init: movaps xmm1, xmm0 ; xmm1 = value mulps xmm1, xmmword ptr [esi] ; xmm1 *= first[0] align 16 Lmain: movaps xmm2, xmm0 ; xmm2 = value addps xmm1, xmmword ptr [edi] ; xmm1 += dest[0] mulps xmm2, xmmword ptr [esi + 16] ; xmm2 *= first[1] add esi, 32 ; first += 2 movaps xmmword ptr [edi], xmm1 ; dest[0] = xmm1 add edi, 32 ; dest += 2 movaps xmm1, xmm0 ; xmm1 = value prefetcht1 byte ptr [ebx] ; prefetch pre addps xmm2, xmmword ptr [edi - 16] ; xmm2 += dest[-1] add ebx, 32 ; pre += 2 cmp esi, eax mulps xmm1, xmmword ptr [esi] ; xmm1 *= first[0] movaps xmmword ptr [edi - 16], xmm2 ; dest[-1] = xmm2 jb Lmain ; esi < eax ? Lmain align 16 ; last data or could not aligned data Llast4_init: cmp esi, eax jae Llast1_init align 16 Llast4: prefetcht2 byte ptr [ebx] ; prefetch pre movups xmm1, xmmword ptr [esi] ; xmm1 = first[0] movups xmm2, xmmword ptr [edi] ; xmm2 = dest[0] mulps xmm1, xmm0 ; xmm1 *= value addps xmm1, xmm2 ; xmm1 += xmm2 movups xmmword ptr [edi], xmm1 ; dest[0] = xmm1 add ebx, 32 ; pre += 32 movups xmm1, xmmword ptr [esi + 16] ; xmm1 = first[1] movups xmm2, xmmword ptr [edi + 16] ; xmm2 = dest[1] mulps xmm1, xmm0 ; xmm1 *= value addps xmm1, xmm2 ; xmm1 += xmm2 movups xmmword ptr [edi + 16], xmm1 ; dest[0] = xmm1 add esi, 32 ; dest += 2 add edi, 32 ; first += 2 cmp esi, eax jb Llast4 ; first < (last - 32) ? Llast4 align 16 Llast1_init: mov eax, last cmp esi, eax jae Llast1_end align 16 Llast1: movaps xmm1, xmm0 ; xmm1 = value mulss xmm1, dword ptr [esi] ; xmm1 *= first[0] add esi, 4 ; first++ addss xmm1, dword ptr [edi] ; xmm1 += last[0] movss dword ptr [edi], xmm1 ; last[0] = xmm1 add edi, 4 ; edi++ cmp esi, eax jb Llast1 ; esi < eax ? Llast1 Llast1_end: } } #endif /*! \brief first から last までの値に value を掛けて dest に足します \param first 配列の先頭の要素を指したポインタ \param last 配列の最後の要素の次を指したポインタ \param value 掛ける値 \param dest 足す値および、結果 次の演算を行います: \code while ( first < last ) *dest++ += *first++ * value; \endcode SIMD が使用できる場合には SIMD を使って演算を行います。\n 今のところ x86 IA-32 の SSE (float) のみ対応。 */ template inline void scalar_madd( const T * first, const T * last, const T value, T * dest ) { if ( is_scalar_madd_simd_enabled( T() ) ) scalar_madd_simd( first, last, value, dest ); else while ( first < last ) *dest++ += *first++ * value; } /*! \brief 疎行列の積 \param src1 疎行列 \param src2 行列 \warning 1 行のメモリが増加方向に連続していない型にこの関数を使用しないでください。 基本的な yoffy::image および yoffy::clip_image は問題ありません。 アドレスが連続していないイテレータや、reverse_iterator を使った型では予期しないアドレスにアクセスします。 src1 に 0 を多く含む場合に演算量を減らす事が出来ます。\n src1 は疎行列でなくても構いません。 src1 と src2 が両方とも float の場合には SSE が使用されます。\n 速度が欲しい場合には yoffy::imagef32 の様な float の使用を検討してください。 src2 だけが疎行列の場合は sparse_matrix_prod2 の使用を検討してください。 */ template IMAGE1 sparse_matrix_prod( const IMAGE1 & src1, const IMAGE2 & src2 ) { typedef typename IMAGE1::size_type size_type; typedef typename IMAGE1::value_type value_type; const int has_simd = is_scalar_madd_simd_enabled( value_type() ); const size_type w = src2.width(), h = src1.height(); const size_type w1 = src1.width(), h1 = src1.height(); IMAGE1 dst(w, h); // 毎ループ scalar_madd を使うと is_scalar_madd_simd_enabled を実行することになるので // ここで scalar_madd と scalar_madd_simd を分けてしまう if ( has_simd ) { for ( size_type y = 0; y < h1; y++ ) for ( size_type x = 0; x < w1; x++ ) { const value_type v = src1(x, y); if ( v ) { //for ( size_type i = 0; i < w; i++ ) // dst(i, y) += v * src2(i, x); scalar_madd_simd( src2.at(0, x).get(), src2.at(w, x).get(), v, dst.at(0, y).get() ); } } } else { for ( size_type y = 0; y < h1; y++ ) for ( size_type x = 0; x < w1; x++ ) { const value_type v = src1(x, y); if ( v ) { //for ( size_type i = 0; i < w; i++ ) // dst(i, y) += v * src2(i, x); scalar_madd( src2.at(0, x).get(), src2.at(w, x).get(), v, dst.at(0, y).get() ); } } } return dst; } /*! \brief 疎行列の積 \param transed_src1 転置済み行列 \param src2 疎行列 \return 転置済み行列積 \warning 1 行のメモリが増加方向に連続していない型にこの関数を使用しないでください。 基本的な yoffy::image および yoffy::clip_image は問題ありません。 アドレスが連続していないイテレータや、reverse_iterator を使った型では予期しないアドレスにアクセスします。 src2 に 0 を多く含む場合に演算量を減らす事が出来ます。\n src2 は疎行列でなくても構いません。 src1 と src2 が両方とも float の場合には SSE が使用されます。\n 速度が欲しい場合には yoffy::imagef32 の様な float の使用を検討してください。 src1 が疎行列の場合や src2 が疎行列で無い場合は sparse_matrix_prod を使用してください。 src1 は効率化のため転置されている必要があります。\n また、戻り値も転置されているため、利用者が元に戻す必要があります。 利用者が転置を 2 回行って疎行列を利用した高速化を維持するよりも、 疎行列による高速化が得られない sparse_matrix_prod を使った方が速い場合があります。\n 転置 2 回のコストと src2 の 0 の多さのバランスをよく考慮してください。 */ template IMAGE1 sparse_matrix_prod2_transed( const IMAGE1 & transed_src1, const IMAGE2 & src2 ) { typedef typename IMAGE1::size_type size_type; typedef typename IMAGE1::value_type value_type; const int has_simd = is_scalar_madd_simd_enabled( value_type() ); const size_type w = src2.width(), h = transed_src1.width(); const size_type w2 = src2.width(), h2 = src2.height(); IMAGE1 transed_dst(h, w); if ( has_simd ) { for ( size_type y = 0; y < h2; y++ ) for ( size_type x = 0; x < w2; x++ ) { const value_type v = src2(x, y); if ( v ) { scalar_madd_simd( transed_src1.at(0, y).get(), transed_src1.at(h, y).get(), v, transed_dst.at(0, x).get() ); } } } else { for ( size_type y = 0; y < h2; y++ ) for ( size_type x = 0; x < w2; x++ ) { const value_type v = src2(x, y); if ( v ) { scalar_madd( transed_src1.at(0, y).get(), transed_src1.at(h, y).get(), v, transed_dst.at(0, x).get() ); } } } return transed_dst; } } // namespace yoffy #endif // YOF_IMAGEUTY