function DelAllSpace(var str: string): string; var i: integer; begin i := 1; repeat i := pos(' ', str, i); delete(str, i, 1); until i = 0; Result := str; end; 应该不慢,单元system的 函数pos和delete 在32位是汇编优化
----------------------------------------------
http://blog.sina.com.cn/bmsrnote
function DelAllSpace(var str: string): string; var i: integer; begin i := 1; repeat i := pos(' ', str, i); delete(str, i, 1); until i = 0; Result := str; end; 应该不慢,单元system的 函数pos和delete 在32位是汇编优化
T128 := mul64To128(AHi, BLo); Result := T128 shl 64;
T128 := mul64To128(ALo, BHi); Result := Result + (T128 shl 64);
T128 := mul64To128(ALo, BLo); Result := Result + T128; end; function U128Intdiv(a, b: uint128): uint128; {$IFDEF CPUX86} asm // push ecx // call uint128.bsr // pop ecx // mov [ecx],eax // mov [ecx+4],0 // mov [ecx+8],0 // mov [ecx+12],0 end; {$ENDIF CPUX86} {$IFDEF CPUX64} asm //[rdx]->a, [r8]->b,[rcx]->result PUSH RBX PUSH RSI PUSH RDI mov r9,rcx //result<->r9 MOV RBX,[r8] //b.lo->rbx MOV R8,[r8+8] //b.hi-r8 OR R8,R8 JNZ @@slow_ldiv //both high words are zero mov rax,[rdx] //a.lo->rax mov rdx,[rdx+8] //a.hi->rdx OR rdx,rdx JZ @@quick_ldiv OR RBX,RBX JZ @@quick_ldiv //if RCX:RBX == 0 force a zero divide @@slow_ldiv: MOV RCX,128 //shift counter XOR RDI,RDI //fake a 64 bit dividend XOR RSI,RSI //
@@xloop: SHL RAX,1 //shift dividend left one bit RCL RDX,1 RCL RSI,1 RCL RDI,1 CMP RDI,R8 //dividend larger? JB @@nosub JA @@subtract CMP RSI,RBX //maybe JB @@nosub @@subtract: SUB RSI,RBX SBB RDI,R8 //subtract the divisor INC RAX //build quotient @@nosub: LOOP @@xloop // //When done with the loop the four registers values' look like: // //| RDI | RSI | RDX | RAX | //| remainder | quotient | // //MOV RAX,RSI //MOV RDX,RDI //use remainder jmp @@finish
@@quick_ldiv: DIV RBX //unsigned divide //XCHG RAX,RDX XOR RDX,RDX @@finish: mov [r9],rax mov [r9+8],rdx POP RDI POP RSI POP RBX end; {$ENDIF CPUX64}
function U128IntdivPas(a, D: uint128): uint128; function IntDivU128(aa, dd: uint128): uint128; var c: integer; U, t: uint128; begin U := 0; t := 0; Result := 0; case CompareValue(aa, dd) of EqualsValue: begin Result := 1; exit; end; LessThanValue: exit; else begin Result := 1; c := UintsHighBit(aa) - UintsHighBit(dd); Result := Result shl c; U := dd shl c; if CompareValue(aa, U) = LessThanValue then begin U := U shr 1; Result := Result shr 1; end; t := aa - U; Result := Result + IntDivU128(t, dd); end; end; end;
begin Result := IntDivU128(a, D); end;
function U128MOD(a, b: uint128): uint128; {$IFDEF CPUX86} //[eax]->a, [edx]->b , [ecx]->result asm Pxor xmm0,xmm0 MOVDQU [ecx],xmm0 //result:=0 push ebp push ebx push esi push edi sub esp,64 mov ebp,esp mov esi,eax //@a <-> [esi] mov edi,edx //@b <-> [edi] mov [ebp],ecx //@result <-> [ebp] mov ebx,ebp add ebx, 20 //@tmpb <-> [ebx] mov esi,ebp add esi,40 //@tmpa:=esi MOVDQU xmm0,[eax] MOVDQU [esi],xmm0 //tmpa:=a mov eax,edi call uint128.bsr mov [ebp+8],eax //b.bsr <-> [ebp+8] @@a_bsr: mov eax,esi call uint128.bsr mov [ebp+4],eax //a.bsr <-> [ebp+4] cmp eax,32 ja @@a_Compare_b cmp [ebp+8],32 ja @@a_Compare_b mov eax, [esi] //a.lo-> eax mov edx,[esi+4]//a.hi-> edx div [edi] //div b mov ecx,[ebp] mov [ecx],edx //mod -> result.0qw jmp @@end @@a_Compare_b: mov eax,[esi+12] cmp eax,[edi+12] ja @@dayu jb @@xiaoyu mov eax,[esi+8] cmp eax,[edi+8] ja @@dayu jb @@xiaoyu mov eax,[esi+4] cmp eax,[edi+4] ja @@dayu jb @@xiaoyu mov eax,[esi] cmp eax,[edi] ja @@dayu jb @@xiaoyu @@dengyu: Pxor xmm0,xmm0 mov ecx,[ebp] MOVDQU [ecx],xmm0 //if a=b then result:=0 jmp @@end @@xiaoyu: mov ecx,[ebp] MOVDQU xmm0,[esi] MOVDQU [ecx],xmm0 //result:=a jmp @@end //if a<b then result:=a @@dayu: mov eax,edi //u128shr(b,count) b<-> [edx] count<->[ecx] 参数传递 b:eax, count:edx ,result:ecx mov edx,[ebp+4] sub edx,[ebp+8] mov ecx,ebx call U128shl //tmpb:= b shl (a.bsr-b.bsr) mov eax,[esi+12] cmp eax,[ebx+12] jb @@tmpb_shr_1 ja @@a_sub_tmpb mov eax,[esi+8] cmp eax,[ebx+8] jb @@tmpb_shr_1 ja @@a_sub_tmpb mov eax,[esi+4] cmp eax,[ebx+4] jb @@tmpb_shr_1 ja @@a_sub_tmpb mov eax,[esi] cmp eax,[ebx] jb @@tmpb_shr_1 ja @@a_sub_tmpb jmp @@dengyu @@tmpb_shr_1: shr [ebx+12],1 rcr [ebx+8],1 rcr [ebx+4],1 rcr [ebx],1 @@a_sub_tmpb: mov eax,[ebx] //a:=a-tmpb sub [esi], eax mov eax,[ebx+4] sbb [esi+4],eax mov eax,[ebx+8] sbb [esi+8],eax mov eax,[ebx+12] sbb [esi+12],eax jmp @@a_bsr @@end: add esp,64 pop edi pop esi pop ebx pop ebp end; {$ENDIF CPUX86} {$IFDEF CPUX64} asm //[rdx]->a, [r8]->b,[rcx]->result PUSH RBX PUSH RSI PUSH RDI mov r9,rcx //result<->r9 MOV RBX,[r8] //b.lo->rbx MOV R8,[r8+8] //b.hi-r8 OR R8,R8 JNZ @@slow_ldiv //both high words are zero mov rax,[rdx] //a.lo->rax mov rdx,[rdx+8] //a.hi->rdx OR rdx,rdx JZ @@quick_ldiv OR RBX,RBX JZ @@quick_ldiv //if RCX:RBX == 0 force a zero divide @@slow_ldiv: MOV RCX,128 //shift counter XOR RDI,RDI //fake a 64 bit dividend XOR RSI,RSI //
@@xloop: SHL RAX,1 //shift dividend left one bit RCL RDX,1 RCL RSI,1 RCL RDI,1 CMP RDI,R8 //dividend larger? JB @@nosub JA @@subtract CMP RSI,RBX //maybe JB @@nosub @@subtract: SUB RSI,RBX SBB RDI,R8 //subtract the divisor INC RAX //build quotient @@nosub: LOOP @@xloop // //When done with the loop the four registers values' look like: // //| RDI | RSI | RDX | RAX | //| remainder | quotient | // MOV RAX,RSI MOV RDX,RDI //use remainder jmp @@finish @@quick_ldiv: DIV RBX //unsigned divide XCHG RAX,RDX XOR RDX,RDX @@finish: mov [r9],rax mov [r9+8],rdx POP RDI POP RSI POP RBX end; {$ENDIF CPUX64}
function U128MODPas(a, M: uint128): uint128; function ModU128(aa, mm: uint128): uint128; var c, ac, mc: integer; U, t: uint128; begin U := 0; t := 0; Result := 0; mc := UintsHighBit(mm); case CompareValue(aa, mm) of EqualsValue: exit; LessThanValue: Result := aa; else begin ac := UintsHighBit(aa); if ac <= 64 then begin Result := Int128Rec(aa).Lo mod Int128Rec(mm).Lo; exit; end; c := ac - mc; U := mm shl c; if CompareValue(aa, U) = LessThanValue then U := U shr 1; t := aa - U; Result := ModU128(t, mm); end; end; //case end;
//好玩,我也写个命令行程序:将以下代码保存成MyClean.dpr,可以在D5~D2010中编译。 //代码未做多少优化,不过可以支持很大的文件,程序占用内存与文件大小无关。 program MyClean; {$APPTYPE CONSOLE} uses SysUtils, Windows, Classes;
function BuildParamList: TStrings; var i: Integer; begin Result := TStringList.Create; for i := 1 to ParamCount() do begin if Pos('=', ParamStr(i)) > 0 then begin Result.Add(ParamStr(i)); end; end; end;
procedure Clean(Source, Target: TStream); var bufSrc: AnsiString; bufTgt: AnsiString; i, k: Integer; len: Integer; tick: DWord; allTicks: DWord; begin SetLength(bufSrc, 4096 * 3); SetLength(bufTgt, 4096 * 3); allTicks := 0; repeat len := Source.Read(bufSrc[1], SizeOf(bufSrc)); i := 1; k := 1; tick := GetTickCount; while i <= len do begin if Byte(bufSrc[i]) <> Byte(VK_SPACE) then begin bufTgt[k] := bufSrc[i]; Inc(k); end; Inc(i); end; Inc(allTicks, GetTickCount - tick); if k > 1 then begin Target.Write(bufTgt[1], k -1); end; until len = 0; WriteLn(Format('Clean Time: %0.3fs', [allTicks / 1000.0])); end;
var i: Integer; begin // Insert user code here for i := 1 to ParamCount() do begin if SameText('/Gen', ParamStr(i)) then begin GenTestFile; Break; end else if SameText('/Clean', ParamStr(i)) then begin CleanSpace; Break; end; end; end.
----------------------------------------------
-广袤璀璨的银河,永无止境的梦想(梦无止境游银河) 博客挂了……
就楼主的列子来说,算法上还能不能更快呢?能,如果你是多核超线程cpu 你可以把s分段计算 在多个线程里 TThread.CreateAnonymousThread( procedure begin for i := 0 to MB div 4 -1 do move(s[3*i+1],s[2*i+1],size); end).start; TThread.CreateAnonymousThread( procedure begin for j := MB div 4 to MB div 4*2 -1 do move(s[3*j+1],s[2*j+1],size); end).start ...... 最后 Tenent.WaitFor 处理结果,估计4核44线程 以上将在1ms以内
----------------------------------------------
http://blog.sina.com.cn/bmsrnote
function BuildParamList: TStrings; var i: Integer; begin Result := TStringList.Create; for i := 1 to ParamCount() do begin if Pos('=', ParamStr(i)) > 0 then begin Result.Add(ParamStr(i)); end; end; end;
procedure Clean(Source, Target: TStream); var bufSrc: AnsiString; bufTgt: AnsiString; i, k: Integer; len: Integer; tick: DWord; allTicks: DWord; begin SetLength(bufSrc, 4096 * 3); SetLength(bufTgt, 4096 * 3); allTicks := 0; repeat len := Source.Read(bufSrc[1], Length(bufSrc)); i := 1; k := 1; tick := GetTickCount; while i <= len do begin { 如果确定是两个字符后跟一个空格,可以直接用PWord^来赋值,速度更快一些 PWord(@bufTgt[k])^ := PWord(@bufSrc[i])^; Inc(i, 3); Inc(k, 2); } if Byte(bufSrc[i]) <> Byte(VK_SPACE) then begin bufTgt[k] := bufSrc[i]; Inc(k); end; Inc(i); //} end; Inc(allTicks, GetTickCount - tick); if k > 1 then begin Target.Write(bufTgt[1], k -1); end; until len = 0; WriteLn(Format('Clean Time: %0.3fs', [allTicks / 1000.0])); end;
var i: Integer; begin // Insert user code here for i := 1 to ParamCount() do begin if SameText('/Gen', ParamStr(i)) then begin GenTestFile; Break; end else if SameText('/Clean', ParamStr(i)) then begin CleanSpace; Break; end; end; end.
----------------------------------------------
-广袤璀璨的银河,永无止境的梦想(梦无止境游银河) 博客挂了……
[quote]就楼主的列子来说,算法上还能不能更快呢?能,如果你是多核超线程cpu 你可以把s分段计算 在多个线程里 TThread.CreateAnonymousThread( procedure begin for i := 0 to MB div 4 -1 do move(s[3*i+1],s[2*i+1],size); end).start; TThread.CreateAnonymousThread( procedure begin for j := MB div 4 to MB div 4*2 -1 do move(s[3*j+1],s[2*j+1],size); end).start ...... 最后 Tenent.WaitFor 处理结果,估计4核44线程 以上将在1ms以内 [/quote] 您又没搞清楚情况吧?不知道您的“处理结果”是基于怎样的数据和硬件,多大数据量,多大 L2/L3 cache。一个简单的内存读写工作,瓶颈根本就不在 CPU 而是内存,甭管几核几线程都有 RAM 速度在那卡着,频繁切线程切 cache 结果是典型的滥用线程导致速度变慢,要不您试试?
----------------------------------------------
cnblogs中我写的关于Delphi的blog,欢迎访问: http://www.cnblogs.com/egust/
const MB = 1024 * 1024 ; threadcount=2; var s, ss: string; I: integer; Tick: Cardinal; len,count: integer; events: array of TLightweightEvent; begin
s := ''; Randomize; Memo_Output.Lines.Add('----------开始产生60m长字符串----------'); Tick := GetTickCount; for I := 0 to MB - 1 do s := s + Format('%.2x ', [Random(256)]); Tick := GetTickCount - Tick; // Memo_Output.Lines.Add(s); Memo_Output.Lines.Add(Format('产生字串s: Size: %d, 耗费时间: %d 毫秒', [strsize(s), Tick])); Memo_Output.Lines.Add('----------开始删除1m长字符串的空格----------'); ss := s; //保存s len := length(s) * 2 div 3;
Tick := GetTickCount; for I := 0 to MB - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); setlength(s, len); Tick := GetTickCount - Tick; // Memo_Output.Lines.Add(s); Memo_Output.Lines.Add(Format('删除字串s空格: Size: %d, 耗费时间: %d 毫秒', [strsize(s), Tick])); //多线程删除空格 s := ss; //恢复s setlength(events, threadcount); count:= MB div threadcount; try // Tick := GetTickCount; for I := 0 to threadcount - 1 do begin events[I] := TLightweightEvent.Create; events[I].ResetEvent; end; Tick := GetTickCount;
TThread.CreateAnonymousThread( procedure var I: integer; begin for I := 0 to count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); events[0].SetEvent; end).Start; TThread.CreateAnonymousThread( procedure var I: integer; begin for I := count to 2*count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); events[1].SetEvent; end).Start; // // TThread.CreateAnonymousThread( // procedure // var // I: integer; // begin // for I := 2*count to 3*count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); // events[2].SetEvent; // end).Start; // TThread.CreateAnonymousThread( // procedure // var // I: integer; // begin // for I := 3*count to 4*count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); // events[3].SetEvent; // end).Start; // // TThread.CreateAnonymousThread( // procedure // var // I: integer; // begin // for I := 4*count to 5*count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); // events[4].SetEvent; // end).Start; // TThread.CreateAnonymousThread( // procedure // var // I: integer; // begin // for I := 5*count to 6*count - 1 do move(s[3 * I + 1], s[2 * I + 1], 2 * StringElementSize(s)); // events[5].SetEvent; // end).Start;
for i := 0 to threadcount-1 do if events[i].WaitFor(3000) <> wrSignaled then begin Memo_Output.Lines.Add(inttostr(i)+'号线程3秒超时或错误'); exit; end; setlength(s, len); Tick := GetTickCount - Tick; // Memo_Output.Lines.Add(s); Memo_Output.Lines.Add(Format(inttostr(threadcount)+'线程删除字串s空格: Size: %d, 耗费时间: %d 毫秒', [strsize(s), Tick])); finally for I := 0 to High(events) do events[I].Free; setlength(events, 0); end;
----------------------------------------------
http://blog.sina.com.cn/bmsrnote
var I, Ticks: Cardinal; S, D: PByte; begin with TMemoryStream.Create do try LoadFromFile('C:\Users\凡\Desktop\1.txt'); S := PByte(Memory) + 3; D := PByte(Memory) + 2; Ticks := GetTickCount; for I := 1 to Size div 3 do begin Move(S^, D^, 2); Inc(S, 3); Inc(D, 2); end; Size := Size div 3 * 2; Ticks := GetTickCount - Ticks; Caption := IntToStr(Ticks); SaveToFile('C:\Users\凡\Desktop\2.txt'); finally Free; end; end;
----------------------------------------------
我的方法很简单,应该也不太慢: function strip_space(const sstr:string):string; var rp,p:PChar; len,i:Integer; begin len:=length(sstr); rp:=GetMem(len); try p:=rp; for i:=1 to len do begin if sstr[i]<>#32 then begin p^:=sstr[i]; inc(p); end; end; result:=rp; finally FreeMem(rp); end; end;
----------------------------------------------
-
写字板的评测不准确,因其界面的响应有 1-2 秒延迟,所谓界面的响应指的是打开文件后,按下 CTRL + END 键让输入焦点转移到文件后.这方面记事本没有任何延迟.
DELPHI POS 程序程序代码如下:
procedure TForm1.btn1Click(Sender: TObject); var t: Cardinal; s: string; sl: TStringlist; begin sl := Tstringlist.Create; sl.LoadFromFile('C:\Users\Ruralist\Desktop\123.txt'); s := sl.Text; sl.free; t := GetTickCount; if pos('"要查询的字符串!"', s) > 0 then caption := IntToStr(GetTickCount - t);
end;
POS 函数源码如下:
// 看不懂:)
procedure _Pos{ substr : ShortString; s : ShortString ) : Integer}; asm { ->EAX Pointer to substr } { EDX Pointer to string } { <-EAX Position of substr in s or 0 }
PUSH EBX PUSH ESI PUSH EDI
MOV ESI,EAX { Point ESI to substr } MOV EDI,EDX { Point EDI to s }
XOR ECX,ECX { ECX = Length(s) } MOV CL,[EDI] INC EDI { Point EDI to first char of s }
PUSH EDI { remember s position to calculate index }
XOR EDX,EDX { EDX = Length(substr) } MOV DL,[ESI] INC ESI { Point ESI to first char of substr }
DEC EDX { EDX = Length(substr) - 1 } JS @@fail { < 0 ? return 0 } MOV AL,[ESI] { AL = first char of substr } INC ESI { Point ESI to 2'nd char of substr }
SUB ECX,EDX { #positions in s to look at } { = Length(s) - Length(substr) + 1 } JLE @@fail @@loop: REPNE SCASB JNE @@fail MOV EBX,ECX { save outer loop counter } PUSH ESI { save outer loop substr pointer } PUSH EDI { save outer loop s pointer }
MOV ECX,EDX REPE CMPSB POP EDI { restore outer loop s pointer } POP ESI { restore outer loop substr pointer } JE @@found MOV ECX,EBX { restore outer loop counter } JMP @@loop
@@fail: POP EDX { get rid of saved s pointer } XOR EAX,EAX JMP @@exit
@@found: POP EDX { restore pointer to first char of s } MOV EAX,EDI { EDI points of char after match } SUB EAX,EDX { the difference is the correct index } @@exit: POP EDI POP ESI POP EBX end;
----------------------------------------------
我写了个纯pas的pos函数,麻烦楼上测一下比汇编慢多少 function posX(const subStr, st: string; iStart: Integer): Integer; var i, j, k: Integer; len1, len2: Integer; begin if (subStr = '') or (iStart <= 0) then begin Result := 0; exit; end;
len1 := Length(subStr); len2 := Length(st);
for j := iStart to len2 do begin k := j; for i := 1 to len1 do begin if subStr[i] <> st[k] then break; inc(k); if k > len2 then Break; end;
if i = len1 then begin exit(j); end; end;
Result := 0; end;
----------------------------------------------
-
function PosEx(const SubStr, S: string; Offset: Cardinal = 1): Integer; var I,X: Integer; Len, LenSubStr: Integer; begin if Offset = 1 then Result := Pos(SubStr, S) else begin I := Offset; LenSubStr := Length(SubStr); Len := Length(S) - LenSubStr + 1; while I <= Len do begin if S[I] = SubStr[1] then begin X := 1; while (X < LenSubStr) and (S[I + X] = SubStr[X + 1]) do Inc(X); if (X = LenSubStr) then begin Result := I; exit; end; end; Inc(I); end; Result := 0; end; end;
----------------------------------------------