Eliot Miranda uploaded a new version of VMMaker to project VM Maker: http://source.squeak.org/VMMaker/VMMaker.oscog-eem.2284.mcz ==================== Summary ==================== Name: VMMaker.oscog-eem.2284 Author: eem Time: 28 November 2017, 12:33:18.151794 pm UUID: 9a2dfc11-2654-41d3-a83f-9a98cbc9ce2a Ancestors: VMMaker.oscog-nice.2283 BitBltSimulation>>copyLoop: Strength reduce the loop split for combinationRule = 3 so that less tests are done in the inner loop and so trhat when combinstionRule = 3 mergeFn is never used. Use an explicit type declaration for unskew instead of trickier relying on type inference. Eliminate bogus use of #== for numeric comparison. =============== Diff against VMMaker.oscog-nice.2283 =============== Item was changed: ----- Method: BitBltSimulation>>copyLoop (in category 'inner loop') ----- copyLoop | prevWord thisWord skewWord halftoneWord mergeWord hInc y unskew skewMask notSkewMask mergeFnwith destWord | "This version of the inner loop assumes noSource = false." + "unskew is a bitShift and MUST remain signed, while skewMask is unsigned." mergeFnwith := self cCoerce: (opTable at: combinationRule+1) to: 'unsigned int (*)(unsigned int, unsigned int)'. mergeFnwith. "null ref for compiler" hInc := hDir*4. "Byte delta" "degenerate skew fixed for Sparc. 10/20/96 ikp" + skew = -32 + ifTrue: [skew := unskew := skewMask := 0] - skew == -32 - ifTrue: - ["Beware: separate skewMask initialization to avoid bad type inference. - Indeed, unskew is a bitShift and MUST remain signed, while skewMask is unsigned" - skew := unskew := 0. - skewMask := 0] ifFalse: [skew < 0 ifTrue: [unskew := skew+32. skewMask := AllOnes << (0-skew)] ifFalse: [skew = 0 ifTrue: [unskew := 0. skewMask := AllOnes] ifFalse: [unskew := skew-32. skewMask := AllOnes >> skew]]]. notSkewMask := skewMask bitInvert32. noHalftone ifTrue: [halftoneWord := AllOnes. halftoneHeight := 0] ifFalse: [halftoneWord := self halftoneAt: 0]. y := dy. + "here is the vertical loop, in two versions, one for the combinationRule = 3 copy mode, one for the general case." + combinationRule = 3 + ifTrue: + [1 to: bbH do: "here is the vertical loop for combinationRule = 3 copy mode; no need to call merge" - 1 to: bbH do: "here is the vertical loop" [ :i | halftoneHeight > 1 ifTrue: "Otherwise, its always the same" [halftoneWord := self halftoneAt: y. y := y + vDir]. preload ifTrue: ["load the 64-bit shifter" prevWord := self srcLongAt: sourceIndex. self incSrcIndex: hInc] ifFalse: [prevWord := 0]. "Note: the horizontal loop has been expanded into three parts for speed:" "This first section requires masking of the destination store..." destMask := mask1. thisWord := self srcLongAt: sourceIndex. "pick up next word" self incSrcIndex: hInc. skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) bitOr: "32-bit rotate" ((thisWord bitAnd: skewMask) bitShift: skew). prevWord := thisWord. destWord := self dstLongAt: destIndex. + destWord := (destMask bitAnd: (skewWord bitAnd: halftoneWord)) bitOr: - mergeWord := self mergeFn: (skewWord bitAnd: halftoneWord) with: destWord. - destWord := (destMask bitAnd: mergeWord) bitOr: (destWord bitAnd: destMask bitInvert32). self dstLongAt: destIndex put: destWord. self incDestIndex: hInc. "This central horizontal loop requires no store masking" destMask := AllOnes. + (skew = 0) & (halftoneWord = AllOnes) - combinationRule = 3 - ifTrue: [(skew = 0) & (halftoneWord = AllOnes) ifTrue: ["Very special inner loop for STORE mode with no skew -- just move words" hDir = -1 ifTrue: ["Woeful patch: revert to older code for hDir = -1" 2 to: nWords-1 do: [ :word | thisWord := self srcLongAt: sourceIndex. self incSrcIndex: hInc. self dstLongAt: destIndex put: thisWord. self incDestIndex: hInc]] ifFalse: [2 to: nWords-1 do: [ :word | "Note loop starts with prevWord loaded (due to preload)" self dstLongAt: destIndex put: prevWord. self incDestIndex: hInc. prevWord := self srcLongAt: sourceIndex. self incSrcIndex: hInc]]] ifFalse: ["Special inner loop for STORE mode -- no need to call merge" 2 to: nWords-1 do: [ :word | thisWord := self srcLongAt: sourceIndex. self incSrcIndex: hInc. skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) bitOr: "32-bit rotate" ((thisWord bitAnd: skewMask) bitShift: skew). prevWord := thisWord. self dstLongAt: destIndex put: (skewWord bitAnd: halftoneWord). + self incDestIndex: hInc]]. + + "This last section, if used, requires masking of the destination store..." + nWords > 1 ifTrue: + [destMask := mask2. + thisWord := self srcLongAt: sourceIndex. "pick up next word" + self incSrcIndex: hInc. + skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) + bitOr: "32-bit rotate" + ((thisWord bitAnd: skewMask) bitShift: skew). + destWord := self dstLongAt: destIndex. + destWord := (destMask bitAnd: (skewWord bitAnd: halftoneWord)) bitOr: + (destWord bitAnd: destMask bitInvert32). + self dstLongAt: destIndex put: destWord. + self incDestIndex: hInc]. + + self incSrcIndex: sourceDelta. + self incDestIndex: destDelta]] + ifFalse: + [1 to: bbH do: "here is the vertical loop for the general case (combinationRule ~= 3)" + [ :i | + halftoneHeight > 1 ifTrue: "Otherwise, its always the same" + [halftoneWord := self halftoneAt: y. + y := y + vDir]. + preload ifTrue: + ["load the 64-bit shifter" + prevWord := self srcLongAt: sourceIndex. + self incSrcIndex: hInc] + ifFalse: + [prevWord := 0]. + + "Note: the horizontal loop has been expanded into three parts for speed:" + + "This first section requires masking of the destination store..." + destMask := mask1. + thisWord := self srcLongAt: sourceIndex. "pick up next word" + self incSrcIndex: hInc. + skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) + bitOr: "32-bit rotate" + ((thisWord bitAnd: skewMask) bitShift: skew). + prevWord := thisWord. + destWord := self dstLongAt: destIndex. + mergeWord := self mergeFn: (skewWord bitAnd: halftoneWord) with: destWord. + destWord := (destMask bitAnd: mergeWord) bitOr: + (destWord bitAnd: destMask bitInvert32). + self dstLongAt: destIndex put: destWord. + self incDestIndex: hInc. + + "This central horizontal loop requires no store masking" + destMask := AllOnes. + 2 to: nWords-1 do: "Normal inner loop does merge:" - self incDestIndex: hInc]] - ] ifFalse: [2 to: nWords-1 do: "Normal inner loop does merge:" [ :word | thisWord := self srcLongAt: sourceIndex. "pick up next word" self incSrcIndex: hInc. skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) bitOr: "32-bit rotate" ((thisWord bitAnd: skewMask) bitShift: skew). prevWord := thisWord. mergeWord := self mergeFn: (skewWord bitAnd: halftoneWord) with: (self dstLongAt: destIndex). self dstLongAt: destIndex put: mergeWord. + self incDestIndex: hInc]. - self incDestIndex: hInc] - ]. "This last section, if used, requires masking of the destination store..." nWords > 1 ifTrue: [destMask := mask2. thisWord := self srcLongAt: sourceIndex. "pick up next word" self incSrcIndex: hInc. skewWord := ((prevWord bitAnd: notSkewMask) bitShift: unskew) bitOr: "32-bit rotate" ((thisWord bitAnd: skewMask) bitShift: skew). destWord := self dstLongAt: destIndex. mergeWord := self mergeFn: (skewWord bitAnd: halftoneWord) with: destWord. destWord := (destMask bitAnd: mergeWord) bitOr: (destWord bitAnd: destMask bitInvert32). self dstLongAt: destIndex put: destWord. self incDestIndex: hInc]. self incSrcIndex: sourceDelta. + self incDestIndex: destDelta]]! - self incDestIndex: destDelta]!