Optimize SIMD, add new line drawing function
diff --git a/src/base/ftobjs.c b/src/base/ftobjs.c
index 1c8e6e1..82113e3 100644
--- a/src/base/ftobjs.c
+++ b/src/base/ftobjs.c
@@ -3154,7 +3154,7 @@
face->garray = (FT_GlyphSlot*)malloc(
face->driver->clazz->slot_object_size * face->num_glyphs );
//error = FT_Set_Char_Size( face, 0, 160 * 64, 300, 300 );
- error = FT_Set_Pixel_Sizes( face, 0, 100);
+ error = FT_Set_Pixel_Sizes( face, 0, 500);
// int glyph_index = FT_Get_Char_Index( face, 'A' );
// error = FT_Load_Glyph( face, glyph_index, FT_LOAD_NO_HINTING );
diff --git a/src/dense/ftdense.c b/src/dense/ftdense.c
index 54d2783..e36dc08 100644
--- a/src/dense/ftdense.c
+++ b/src/dense/ftdense.c
@@ -80,25 +80,35 @@
static int
dense_line_to( const FT_Vector* to, dense_worker* worker )
{
- dense_render_line( worker, UPSCALE( to->x ), UPSCALE( to->y ) );
+ dense_render_line( worker, worker->prev_x, worker->prev_y, UPSCALE( to->x ), UPSCALE( to->y ) );
dense_move_to( to, worker );
return 0;
}
void
-dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
+dense_render_line( dense_worker* worker, FT_Pos fromx, FT_Pos fromy, FT_Pos tox, FT_Pos toy )
+{
+ return;
+}
+
+
+void
+dense_render_line2( dense_worker* worker, FT_PreLine pl )
{
//printf("Line from %d, %d to %d, %d\n", worker->prev_x, worker->prev_y, tox, toy);
- FT26D6 fx = worker->prev_x>>2;
- FT26D6 fy = worker->prev_y>>2;
+ // FT26D6 fx = worker->prev_x>>2;
+ // FT26D6 fy = worker->prev_y>>2;
+
+ FT26D6 fx = UPSCALE(pl->x1)>>2;
+ FT26D6 fy = UPSCALE(pl->y1)>>2;
FT26D6 from_x = fx;
FT26D6 from_y = fy;
- FT26D6 tx = tox>>2;
- FT26D6 ty = toy>>2;
+ FT26D6 tx = UPSCALE(pl->x2)>>2;
+ FT26D6 ty = UPSCALE(pl->y2)>>2;
if ( fy == ty )
return;
@@ -342,7 +352,7 @@
if ( devsq < 0.333f )
{
- dense_render_line( worker, aP3.x, aP3.y );
+ dense_render_line( worker, worker->prev_x, worker->prev_y, aP3.x, aP3.y );
return;
}
@@ -357,7 +367,7 @@
FT_Vector a = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) );
FT_Vector b = Lerp( t, Lerp( t, aP1, aP2 ), Lerp( t, aP2, aP3 ) );
FT_Vector next = Lerp( t, a, b );
- dense_render_line( worker, next.x, next.y );
+ dense_render_line( worker, worker->prev_x, worker->prev_y, next.x, next.y );
worker->prev_x = next.x;
worker->prev_y = next.y;
p = next;
@@ -423,22 +433,17 @@
{
// FT_Error error = FT_Outline_Decompose( &( worker->outline ),
// &dense_decompose_funcs, worker );
- FT_Vector point1 = {pl->x1, pl->y1};
- FT_Vector point2 = {100, 100};
+ // FT_Vector point1 = {pl->x1, pl->y1};
- FT_Error error = dense_move_to(&point1, worker);
+ FT_Error error = 0;
while (pl!=NULL)
{
- point1.x = pl->x1;
- point1.y = pl->y1;
- point2.x = pl->x2;
- point2.y = pl->y2;
+ dense_render_line2(worker, pl);
- if(pl->ismove){
- dense_move_to(&point2, worker);
- }else{
- dense_line_to(&point2, worker);
- }
+
+ // worker->prev_x = UPSCALE(pl->x2);
+ // worker->prev_y = UPSCALE(pl->y2);
+ //dense_line_to(&point2, worker);
pl= pl->next;
}
// point.x = 100;
@@ -493,7 +498,8 @@
// cap max value to 1
//y = _mm_min_epi32( _mm_srli_epi32( y, 4 ), _mm_set1_epi32( 255 ) );
- __m128i y = _mm_abs_epi32(_mm_srai_epi32( x , 4 ));
+ //__m128i y = _mm_abs_epi32(_mm_srai_epi32( x , 4 ));
+ __m128i y = _mm_srli_epi32( _mm_abs_epi32( x) , 4 );
// reduce to 255
// y =
@@ -505,7 +511,8 @@
//__m128i z = _mm_packus_epi16(_mm_packs_epi32(z, nzero), nzero);
// int* ptr = (int*)&dest[i];
- *(int*)&dest[i] = *(int*)&y;
+ _mm_storeu_si32(&dest[i], y);
+ //*(int*)&dest[i] = *(int*)&y;
//*(int*)&dest[i] = _mm_extract_epi32(y, 0);
//_mm_store_ss( (float*)&dest[i], _mm_castsi128_ps(y) );
@@ -527,7 +534,7 @@
value += *source++;
if(value > 0){
- int n = value >>4;
+ int n = value >>4;_Pos fromx, FT_Pos fromy, FT_Pos tox, FT_Pos toy
if(n>255)n=255;
*dest = (unsigned char)n;
diff --git a/src/dense/ftdense.h b/src/dense/ftdense.h
index e3f8f19..1f75bc5 100644
--- a/src/dense/ftdense.h
+++ b/src/dense/ftdense.h
@@ -43,7 +43,7 @@
FT_Outline outline;
} dense_worker;
- void dense_render_line( dense_worker* worker, FT_Pos to_x, FT_Pos to_y );
+ void dense_render_line( dense_worker* worker, FT_Pos from_x, FT_Pos from_y, FT_Pos to_x, FT_Pos to_y );
void dense_render_quadratic( dense_worker* worker,
FT_Vector* control,
FT_Vector* to );