sergey senozhatsky's blog: March 2011

Friday, March 25, 2011

[OOPS] elevator private data for REQ_FLUSH requests

Commit
    9d5a4e946ce5352f19400b6370f4cd8e72806278
    block: skip elevator data initialization for flush requests

    Skip elevator initialization for flush requests by passing priv=0 to
    blk_alloc_request() in get_request().  As such elv_set_request() is
    never called for flush requests.

introduced priv flag, to skip elevator_private data init for block FLUSH

requests. This lead to a NULL pointer deref on my machine in cfq_insert_request,
which requires elevator_private to be set:

  1 [   78.982169] Call Trace:                                                                                                                                                                                                     
  2 [   78.982178]   cfq_insert_request+0x4e/0x47d
  3 [   78.982184]   ? do_raw_spin_lock+0x6b/0x122
  4 [   78.982189]   elv_insert+0x212/0x265
  5 [   78.982192]   __elv_add_request+0x50/0x52
  6 [   78.982195]   flush_plug_list+0xce/0x12f
  7 [   78.982199]   __blk_flush_plug+0x15/0x21
  8 [   78.982205]   schedule+0x43e/0xbea
  9 [   78.982211]   ? __lock_acquire+0x149d/0x1576
 10 [   78.982215]   ? drive_stat_acct+0x1b6/0x1c3
 11 [   78.982218]   ? drive_stat_acct+0x44/0x1c3
 12 [   78.982223]   ? __make_request+0x268/0x2bf
 13 [   78.982226]   schedule_timeout+0x35/0x3b8
 14 [   78.982230]   ? mark_held_locks+0x4b/0x6d
 15 [   78.982234]   ? _raw_spin_unlock_irq+0x28/0x56
 16 [   78.982239]   ? get_parent_ip+0xe/0x3e
 17 [   78.982244]   ? sub_preempt_count+0x90/0xa3
 18 [   78.982247]   wait_for_common+0xc3/0x141
 19 [   78.982251]   ? default_wake_function+0x0/0xf
 20 [   78.982254]   wait_for_completion+0x18/0x1a
 21 [   78.982258]   blkdev_issue_flush+0xcb/0x11a
 22 [   78.982264]   ext4_sync_file+0x2b3/0x302
 23 [   78.982268]   vfs_fsync_range+0x55/0x75
 24 [   78.982271]   generic_write_sync+0x3f/0x41
 25 [   78.982278]   generic_file_aio_write+0x8c/0xb9
 26 [   78.982281]   ext4_file_write+0x1dc/0x237
 27 [   78.982285]   ? do_raw_spin_lock+0x6b/0x122
 28 [   78.982288]   ? ext4_file_write+0x0/0x237
 29 [   78.982292]   do_sync_readv_writev+0xb4/0xf9
 30 [   78.982298]   ? security_file_permission+0x1e/0x84
 31 [   78.982302]   ? rw_verify_area+0xab/0xc8
 32 [   78.982305]   do_readv_writev+0xb8/0x17d
 33 [   78.982309]   ? fget_light+0x166/0x30b
 34 [   78.982312]   vfs_writev+0x40/0x42
 35 [   78.982315]   sys_pwritev+0x55/0x99
 36 [   78.982320]   system_call_fastpath+0x16/0x1b
 37

 
My solution was to use ELEVATOR_INSERT_FLUSH flag as an elv_insert param

for REQ_FLUSH | REQ_FUA requests (lkml)


---
 block/elevator.c |    2 ++
@@ -734,6 +734,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
    q->end_sector = rq_end_sector(rq);
    q->boundary_rq = rq;
   }
+ } else if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+  where = ELEVATOR_INSERT_FLUSH;
  } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
       where == ELEVATOR_INSERT_SORT)
   where = ELEVATOR_INSERT_BACK;

Jens Axboe has came up with more high-level solution:

 @@ -2702,7 +2702,10 @@ static void flush_plug_list(struct blk_plug *plug)
                /*
                 * rq is already accounted, so use raw insert
                 */
-               __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
+               if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+                       __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
+               else
+                       __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
        }

        if (q) {

(flush_plug_list is one level higher than __elv_add_request)

Git pull request message says (lkml)

"Thanks a lot to the people involved with fixing the first issue."

hope he was talking about me...


.

Thursday, March 24, 2011

g++ behind the static_cast

Assume that we have a simple C++ code

const int i = static_cast<const int>(123.123f);

What is actually behind the static_cast?

The magic of static_cast starts in

tree
build_static_cast (tree type, tree expr, tsubst_flags_t complain)
{
tree result;
bool valid_p;

if (type == error_mark_node || expr == error_mark_node)
    return error_mark_node;

if (processing_template_decl)
    {
      expr = build_min (STATIC_CAST_EXPR, type, expr);
      /* We don't know if it will or will not have side effects. */
      TREE_SIDE_EFFECTS (expr) = 1;
      return convert_from_reference (expr);
    }

/* build_c_cast puts on a NOP_EXPR to make the result not an lvalue.
     Strip such NOP_EXPRs if VALUE is being used in non-lvalue context. */
if (TREE_CODE (type) != REFERENCE_TYPE
      && TREE_CODE (expr) == NOP_EXPR
      && TREE_TYPE (expr) == TREE_TYPE (TREE_OPERAND (expr, 0)))
    expr = TREE_OPERAND (expr, 0);

result = build_static_cast_1 (type, expr, /*c_cast_p=*/false, &valid_p,
                                complain);
if (valid_p)
    return result;

if (complain & tf_error)
    error ("invalid static_cast from type %qT to type %qT",
           TREE_TYPE (expr), type);
return error_mark_node;
}

There, for example, we can see g++'s "invalid static_cast ..." error message.

build_static_cast is a wrapper around build_static_cast_1, that, in turn,
is a big "switch", since it should deal with all kind of derived to
base casts, user defined casts, type instantiation, etc

static tree
build_static_cast_1 (tree type, tree expr, bool c_cast_p,
                     bool *valid_p, tsubst_flags_t complain)

For example:

   /* "An lvalue of type cv1 T1 can be cast to type rvalue reference to
      cv2 T2 if cv2 T2 is reference-compatible with cv1 T1 (8.5.3)." */
   if (TREE_CODE (type) == REFERENCE_TYPE
       && TYPE_REF_IS_RVALUE (type)
       && real_lvalue_p (expr)
       && reference_related_p (TREE_TYPE (type), intype)
       && (c_cast_p || at_least_as_qualified_p (TREE_TYPE (type), intype)))
     {
       expr = build_typed_address (expr, type);
       return convert_from_reference (expr);
     }

From here we're getting to gcc/fold-const.c
Place where tons of interesting stuff are, for example, fold_convert_loc.
Which is, once again, a number of "switches" (convert from INTEGER to
CONST INTEGER, etc.) :

Convert expression ARG to type TYPE. Used by the middle-end for
simple conversions in preference to calling the front-end's convert.
fold_convert_loc (location_t loc, tree type, tree arg)
[..]
   switch (TREE_CODE (type))
     {
     case POINTER_TYPE:
     case REFERENCE_TYPE:
       /* Handle conversions between pointers to different address spaces. */
       if (POINTER_TYPE_P (orig)
       && (TYPE_ADDR_SPACE (TREE_TYPE (type))
           != TYPE_ADDR_SPACE (TREE_TYPE (orig))))
     return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, arg);
or
     case INTEGER_TYPE: case ENUMERAL_TYPE: case BOOLEAN_TYPE:
     case OFFSET_TYPE:
       if (TREE_CODE (arg) == INTEGER_CST)
     {
       tem = fold_convert_const (NOP_EXPR, type, arg);
       if (tem != NULL_TREE)
         return tem;
     }

Since we're requesting cast to const int -- the right function to call
is fold_convert_const, which is getting called from

Fold a unary expression of code CODE and type TYPE with operand
OP0. Return the folded expression if folding is successful.
Otherwise, return NULL_TREE.
fold_unary_loc (location_t loc, enum tree_code code, tree type, tree op0)
[..]
   switch (code)
     {
     case PAREN_EXPR:
       /* Re-association barriers around constants and other re-association
      barriers can be removed. */
       if (CONSTANT_CLASS_P (op0)
       || TREE_CODE (op0) == PAREN_EXPR)
     return fold_convert_loc (loc, type, op0);
       return NULL_TREE;

     CASE_CONVERT:
     case FLOAT_EXPR:
     case FIX_TRUNC_EXPR:
[..]
       tem = fold_convert_const (code, type, op0);
       return tem ? tem : NULL_TREE;

fold_convert_const decides what conversion exactly should be
performed (if any):

fold_convert_const (enum tree_code code, tree type, tree arg1)
[..]
   if (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
       || TREE_CODE (type) == OFFSET_TYPE)
     {
       if (TREE_CODE (arg1) == INTEGER_CST)
     return fold_convert_const_int_from_int (type, arg1);
       else if (TREE_CODE (arg1) == REAL_CST)
     return fold_convert_const_int_from_real (code, type, arg1);
       else if (TREE_CODE (arg1) == FIXED_CST)
     return fold_convert_const_int_from_fixed (type, arg1);
     }
[..]

which is finally getting closer to a cast itself by

real_to_integer2 (HOST_WIDE_INT *plow, HOST_WIDE_INT *phigh,
                  const REAL_VALUE_TYPE *r)

call in gcc/real.c

   The floating point model used internally is not exactly IEEE 754
   compliant, and close to the description in the ISO C99 standard,
   section 5.2.4.2.2 Characteristics of floating types.

   Specifically

        x = s * b^e * \sum_{k=1}^p f_k * b^{-k}

        where
                s = sign (+- 1)
                b = base or radix, here always 2
                e = exponent
                p = precision (the number of base-b digits in the significand)
                f_k = the digits of the significand.

   We differ from typical IEEE 754 encodings in that the entire
   significand is fractional. Normalized significands are in the
   range [0.5, 1.0).

   A requirement of the model is that P be larger than the largest
   supported target floating-point type by at least 2 bits. This gives
   us proper rounding when we truncate to the target type. In addition,
   E must be large enough to hold the smallest supported denormal number
   in a normalized form.

   Both of these requirements are easily satisfied. The largest target
   significand is 113 bits; we store at least 160. The smallest
   denormal number fits in 17 exponent bits; we store 26.

   Note that the decimal string conversion routines are sensitive to
   rounding errors. Since the raw arithmetic routines do not themselves
   have guard digits or rounding, the computation of 10**exp can
   accumulate more than a few digits of error. The previous incarnation
   of real.c successfully used a 144-bit fraction; given the current
   layout of REAL_VALUE_TYPE we're forced to expand to at least 160 bits.

There is a bunch of fold_convert_* functions, e.g.:
fold_convert_const_int_from_int
fold_convert_const_int_from_real
fold_convert_const_int_from_fixed
fold_convert_const_real_from_real

and so on.

Wednesday, March 23, 2011

kvm: x86 instruction decoder/emulator

In case you've been wondered about kvm emulation, devs did
steb-by-step decoding/emulation. Really outstanding and impressive:

arch/x86/kvm/emulate.c
x86_emulate_insn
[..]
3036     switch (c->b) {
[..]
3078     case 0x28 ... 0x2d:
3079           sub:      /* sub */
3080         emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
3081         break;
3082     case 0x30 ... 0x35:
3083           xor:      /* xor */
3084         emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
3085         break;
3086     case 0x38 ... 0x3d:
3087           cmp:      /* cmp */
3088         emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3089         break;
3090     case 0x40 ... 0x47: /* inc r16/r32 */
3091         emulate_1op("inc", c->dst, ctxt->eflags);
3092         break;
3093     case 0x48 ... 0x4f: /* dec r16/r32 */
3094         emulate_1op("dec", c->dst, ctxt->eflags);
3095         break;
3096     case 0x58 ... 0x5f: /* pop reg */
3097     pop_instruction:
3098         rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
3099         break;
3100     case 0x60: /* pusha */
3101         rc = emulate_pusha(ctxt, ops);
3102         break;
3103     case 0x61: /* popa */
3104         rc = emulate_popa(ctxt, ops);
3105         break;
3106     case 0x63:      /* movsxd */
3107         if (ctxt->mode != X86EMUL_MODE_PROT64)
3108             goto cannot_emulate;
3109         c->dst.val = (s32) c->src.val;
3110         break;
3111     case 0x6c:      /* insb */
3112     case 0x6d:      /* insw/insd */
3113         c->src.val = c->regs[VCPU_REGS_RDX];
3114         goto do_io_in;
3115     case 0x6e:      /* outsb */
3116     case 0x6f:      /* outsw/outsd */
3117         c->dst.val = c->regs[VCPU_REGS_RDX];
3118         goto do_io_out;
3119         break;
3120     case 0x70 ... 0x7f: /* jcc (short) */
3121         if (test_cc(c->b, ctxt->eflags))
3122             jmp_rel(c, c->src.val);
3123         break;
3124     case 0x80 ... 0x83: /* Grp1 */
3125         switch (c->modrm_reg) {
3126         case 0:
3127             goto add;
3128         case 1:
3129             goto or;
3130         case 2:
3131             goto adc;
3132         case 3:
3133             goto sbb;
3134         case 4:
3135             goto and;
3136         case 5:
3137             goto sub;
3138         case 6:
3139             goto xor;
3140         case 7:
3141             goto cmp;
3142         }
3143         break;
3144     case 0x84 ... 0x85:
[..]

the whole platform... with sysenter/sysexit/syscal/etc.

emulate_sysenter
[..]
1650     /* inject #GP if in real mode */
1651     if (ctxt->mode == X86EMUL_MODE_REAL)
1652         return emulate_gp(ctxt, 0);
1653
1654     /* XXX sysenter/sysexit have not been tested in 64bit mode.
1655     * Therefore, we inject an #UD.
1656     */
1657     if (ctxt->mode == X86EMUL_MODE_PROT64)
1658         return emulate_ud(ctxt);
1659
1660     setup_syscalls_segments(ctxt, ops, &cs, &ss);
1661
1662     ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1663     switch (ctxt->mode) {
1664     case X86EMUL_MODE_PROT32:
1665         if ((msr_data & 0xfffc) == 0x0)
1666             return emulate_gp(ctxt, 0);
1667         break;
1668     case X86EMUL_MODE_PROT64:
1669         if (msr_data == 0x0)
1670             return emulate_gp(ctxt, 0);
1671         break;
1672     }
[..]

Monday, March 21, 2011

gcc-4.6 RC1

rev 171232
make -j8 -k check

libstdc++-v3/testsuite/libstdc++.sum

=== libstdc++ Summary ===

# of expected passes 7674

# of unexpected failures 16

# of expected failures 83

# of unresolved testcases 1

# of unsupported tests 392

gcc/testsuite/g++/g++.sum

=== g++ Summary ===

# of expected passes 27262

# of expected failures 161

# of unsupported tests 359

gcc/testsuite/g++/../../g++ version 4.6.0 20110320 (prerelease) (GCC)

gcc/testsuite/gcc/gcc.sum

=== gcc Summary ===

# of expected passes 78155

# of unexpected failures 31

# of unexpected successes 34

# of expected failures 230

# of unsupported tests 1087

gcc/xgcc version 4.6.0 20110320 (prerelease) (GCC)

Tuesday, March 1, 2011

llvm: sizeof for `primitive' types

If you ever wondered how does llvm calculates sizeof for
`primitive' types (I did), target independent types are hard-coded:

include/llvm/CodeGen/ValueTypes.h

/// MVT - Machine Value Type. Every type that is supported natively by some
/// processor targeted by LLVM occurs here. This means that any legal value
/// type can be represented by a MVT.
class MVT {
public:
    enum SimpleValueType {
      // If you change this numbering, you must change the values in
      // ValueTypes.td as well!
      Other          =   0,   // This is a non-standard value
      i1             =   1,   // This is a 1 bit integer value
      i8             =   2,   // This is an 8 bit integer value
      i16            =   3,   // This is a 16 bit integer value
      i32            =   4,   // This is a 32 bit integer value
      i64            =   5,   // This is a 64 bit integer value
      i128           =   6,   // This is a 128 bit integer value
[..]

And here we go:

    unsigned getSizeInBits() const {
      switch (SimpleTy) {
      case iPTR:
        assert(0 && "Value type size is target-dependent. Ask TLI.");
      case iPTRAny:
      case iAny:
      case fAny:
        assert(0 && "Value type is overloaded.");
      default:
        assert(0 && "getSizeInBits called on extended MVT.");
      case i1 : return 1;
      case i8 : return 8;
      case i16 :
      case v2i8: return 16;
      case f32 :
      case i32 :
      case v4i8:
      case v2i16: return 32;
      case x86mmx:
      case f64 :
      case i64 :
      case v8i8:
      case v4i16:
      case v2i32:
      case v1i64:
      case v2f32: return 64;
      case f80 : return 80;
      case f128:
      case ppcf128:
      case i128:
      case v16i8:
      case v8i16:
      case v4i32:
      case v2i64:
      case v4f32:
      case v2f64: return 128;
      case v32i8:
      case v16i16:
      case v8i32:
      case v4i64:
      case v8f32:
      case v4f64: return 256;
      case v8i64: return 512;
      }
    }

`Primitive' target dependent types:
lib/VMCore/Type.cpp

unsigned Type::getPrimitiveSizeInBits() const {
switch (getTypeID()) {
case Type::FloatTyID: return 32;
case Type::DoubleTyID: return 64;
case Type::X86_FP80TyID: return 80;
case Type::FP128TyID: return 128;
case Type::PPC_FP128TyID: return 128;
case Type::X86_MMXTyID: return 64;
case Type::IntegerTyID: return cast(this)->getBitWidth();
case Type::VectorTyID: return cast(this)->getBitWidth();
default: return 0;
}
}

[..]

int Type::getFPMantissaWidth() const {
if (const VectorType *VTy = dyn_cast(this))
return VTy->getElementType()->getFPMantissaWidth();
assert(isFloatingPointTy() && "Not a floating point type!");
if (ID == FloatTyID) return 24;
if (ID == DoubleTyID) return 53;
if (ID == X86_FP80TyID) return 64;
if (ID == FP128TyID) return 113;
assert(ID == PPC_FP128TyID && "unknown fp type");
return -1;
}

Target dependent types:

lib/Target/ARM/ARMAsmBackend.cpp

unsigned getPointerSize() const { return 4; }

lib/VMCore/Module.cpp

/// Target Pointer Size information...
Module::PointerSize Module::getPointerSize() const {
StringRef temp = DataLayout;
Module::PointerSize ret = AnyPointerSize;

while (!temp.empty()) {
    StringRef token, signalToken;
    tie(token, temp) = getToken(temp, "-");
    tie(signalToken, token) = getToken(token, ":");

    if (signalToken[0] == 'p') {
      int size = 0;
      getToken(token, ":").first.getAsInteger(10, size);
      if (size == 32)
        ret = Pointer32;
      else if (size == 64)
        ret = Pointer64;
    }
}

return ret;
}

Where
lib/Support/StringRef.cpp

/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
                                 unsigned long long &Result) {
// Autosense radix if not specified.
if (Radix == 0)
    Radix = GetAutoSenseRadix(Str);

// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;

// Parse all the bytes of the string given this radix. Watch for overflow.
Result = 0;
while (!Str.empty()) {
    unsigned CharVal;
    if (Str[0] >= '0' && Str[0] <= '9')
      CharVal = Str[0]-'0';
    else if (Str[0] >= 'a' && Str[0] <= 'z')
      CharVal = Str[0]-'a'+10;
    else if (Str[0] >= 'A' && Str[0] <= 'Z')
      CharVal = Str[0]-'A'+10;
    else
      return true;

    // If the parsed value is larger than the integer radix, the string is
    // invalid.
    if (CharVal >= Radix)
      return true;

    // Add in this character.
    unsigned long long PrevResult = Result;
    Result = Result*Radix+CharVal;

    // Check for overflow.
    if (Result < PrevResult)
      return true;

    Str = Str.substr(1);
}

return false;
}

(same is for GetAsUnsignedInteger)

I should say -- it's a pleasure to read llvm source code.

sergey senozhatsky's blog