Performance differences... so dramatic?
Asked Answered
K

7

65

Just now I read some posts about List<T> vs LinkedList<T>, so I decided to benchmark some structures myself. I benchmarked Stack<T>, Queue<T>, List<T> and LinkedList<T> by adding data and removing data to/from the front/end. Here's the benchmark result:

               Pushing to Stack...  Time used:      7067 ticks
              Poping from Stack...  Time used:      2508 ticks

               Enqueue to Queue...  Time used:      7509 ticks
             Dequeue from Queue...  Time used:      2973 ticks

    Insert to List at the front...  Time used:   5211897 ticks
RemoveAt from List at the front...  Time used:   5198380 ticks

         Add to List at the end...  Time used:      5691 ticks
  RemoveAt from List at the end...  Time used:      3484 ticks

         AddFirst to LinkedList...  Time used:     14057 ticks
    RemoveFirst from LinkedList...  Time used:      5132 ticks

          AddLast to LinkedList...  Time used:      9294 ticks
     RemoveLast from LinkedList...  Time used:      4414 ticks

Code:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;

namespace Benchmarking
{
    static class Collections
    {
        public static void run()
        {
            Random rand = new Random();
            Stopwatch sw = new Stopwatch();
            Stack<int> stack = new Stack<int>();
            Queue<int> queue = new Queue<int>();
            List<int> list1 = new List<int>();
            List<int> list2 = new List<int>();
            LinkedList<int> linkedlist1 = new LinkedList<int>();
            LinkedList<int> linkedlist2 = new LinkedList<int>();
            int dummy;


            sw.Reset();
            Console.Write("{0,40}", "Pushing to Stack...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                stack.Push(rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "Poping from Stack...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = stack.Pop();
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);


            sw.Reset();
            Console.Write("{0,40}", "Enqueue to Queue...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                queue.Enqueue(rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "Dequeue from Queue...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = queue.Dequeue();
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);


            sw.Reset();
            Console.Write("{0,40}", "Insert to List at the front...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                list1.Insert(0, rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "RemoveAt from List at the front...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = list1[0];
                list1.RemoveAt(0);
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);


            sw.Reset();
            Console.Write("{0,40}", "Add to List at the end...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                list2.Add(rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "RemoveAt from List at the end...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = list2[list2.Count - 1];
                list2.RemoveAt(list2.Count - 1);
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);


            sw.Reset();
            Console.Write("{0,40}", "AddFirst to LinkedList...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                linkedlist1.AddFirst(rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "RemoveFirst from LinkedList...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = linkedlist1.First.Value;
                linkedlist1.RemoveFirst();
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);


            sw.Reset();
            Console.Write("{0,40}", "AddLast to LinkedList...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                linkedlist2.AddLast(rand.Next());
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks", sw.ElapsedTicks);
            sw.Reset();
            Console.Write("{0,40}", "RemoveLast from LinkedList...");
            sw.Start();
            for (int i = 0; i < 100000; i++)
            {
                dummy = linkedlist2.Last.Value;
                linkedlist2.RemoveLast();
                dummy++;
            }
            sw.Stop();
            Console.WriteLine("  Time used: {0,9} ticks\n", sw.ElapsedTicks);
        }
    }
}

The differences are so dramatic!

As you can see, the performance of Stack<T> and Queue<T> are fast and comparable, that's expected.

For List<T>, using the front and the end has so much differences! And to my surprise, performance of adding/removing from the end is actually comparable to the performance of Stack<T>.

For LinkedList<T>, manipulating with the front is fast (-er than List<T>), but for the end, it is incredibly slow for removing manipulating with the end is too.


So... can any experts account on:

  1. the similarity in performance of using Stack<T> and the end of List<T>,
  2. the differences in using the front and the end of List<T>, and
  3. the reason that using the end of LinkedList<T> is so slow (not applicable as that is a coding error due to the use of Linq's Last(), thanks to CodesInChaos)?

I think I know why List<T> doesn't handle the front so well... because List<T>needs to move the whole list back and fro when doing that. Correct me if I am wrong.

P.S. My System.Diagnostics.Stopwatch.Frequency is 2435947, and the program is targeted to .NET 4 Client Profile and compiled with C# 4.0, on Windows 7 Visual Studio 2010.

Kanara answered 3/11, 2012 at 16:48 Comment(2)
No offense but you should first learn WHAT those data structures are and how complex (big O notation) is each operation on each data structure. Unfortunately MSDN won't help you there but it should't be problem to find enough info on Wikipedia.Hoffert
@EmperorOrionii Yes I understand those structures. I was just puzzled why a double-linked list takes such a long time to access the end but it turned out to be a coding error.Kanara
T
42

Concerning 1:

Stack<T>'s and List<T>'s performance being similar isn't surprising. I'd expect both of them to use arrays with a doubling strategy. This leads to amortized constant-time additions.

You can use List<T> everywhere you can use Stack<T>, but it leads to less expressive code.

Concerning 2:

I think I know why List<T> doesn't handle the front so well... because List<T> needs to move the whole list back and fro when doing that.

That's correct. Inserting/removing elements at the beginning is expensive because it moves all elements. Getting or replacing elements at the beginning on the other hand is cheap.

Concerning 3:

Your slow LinkedList<T>.RemoveLast value is a mistake in your benchmarking code.

Removing or getting the last item of a doubly linked list is cheap. In the case of LinkedList<T> that means that RemoveLast and Last are cheap.

But you weren't using the Last property, but LINQ's extension method Last(). On collections that don't implement IList<T> it iterates the whole list, giving it O(n) runtime.

Tome answered 3/11, 2012 at 17:11 Comment(3)
Oh you're right, changing to LinkedList<T>.Last.Value is much better!Kanara
+1 Didn't check the code and didn't know enough about LinkedList<T> find the result suspicious.Boche
I finally decided to accept this as you are the only one who found my bugged code in LinkedList<T>.Kanara
B
13

List<T> is a dynamic over-allocating array (a data structure you'll also see in many other languages' standard library). This means it internally uses of a "static" array (an array that can't be resized, known as just "array" in .NET) which may be and often is larger than the size of the list. Appending then simply increments a counter and uses the next, previously unused, slot of the internal array. The array is only re-allocated (which requires copying all elements) if the internal array becomes to small to accommodate all items. When that happens, the size of the array is increased by a factors (not a constant), usually 2.

This ensures that amortized time complexity (basically, the average time per operation over a long sequence of operations) for appending is O(1) even in the worst case. For adding at the front, no such optimization is feasible (at least not while keeping both random access and O(1) appending at the end). It always has to copy all elements to move them into their new slots (making space for the added element in the first slot). Stack<T> does the same thing, you just don't notice the discrepancy with adding to the front because you only ever operate on one end (the fast one).

Getting the end of a linked list depends a lot on the internals of your list. One can maintain a reference to the last element, but this makes all operations on the list more complicated, and may (I don't have an example at hand) make some operations much more expensive. Lacking such a reference, appending to the end requires walking through all elements of the linked list to find the last node, which is of course awfully slow for lists of nontrivial size.

As pointed out by @CodesInChaos, your linked list manipulation was flawed. The fast retrieval of the end you see now is most likely caused by LinkedList<T> explicitly maintaining a reference to the last node, as mentioned above. Note that getting an element not at either end is still slow.

Boche answered 3/11, 2012 at 17:6 Comment(3)
"For adding at the front, no such optimization is feasible." I suppose you could put the List elements in the middle of the "static" array, and keep an offset from the front, instead of using a 0 offset. Then you could add items to the list in constant time by using (offset-1) position and then decrementing the offset. I guess it's not worth wasting the memory for such a special case though.Bardwell
@smackfu I suppose that would work, but I'm not sure how this interacts with doing the same thing on the end, and whether it ends up "just" saving a large constant factor or if it would improve the amortized complexity. And yes, this would be quite a special case. A deque is better for operations on both ends.Boche
@smackfu Even than inserting in the middle would still be O(n). One could do case by case optimizations but that would complicate things very fast. But there is something along that line, a tree list where elements are stored as leafs in self-balancing tree. It's slightly slower on random access than array based list but has O(log(n)) complexity for inserting and removing (and accessing) element at random index. I've found the implementation for Java but haven't had luck with .Net solution.Hoffert
B
6

The speed comes essentially from the number of operations needed to insert, delete, or search an item. You already noticed, that list needs memory transfers.

Stack is a list, that is accessible only at the top element -- and the computer always knows, where it is.

The linked list is another thing: the start of the list is known, thus it's very fast to add or remove from the start -- but finding the last element takes time. Caching the location of the last element OTOH is only worthwhile for addition. For deletion one needs to traverse the complete list minus one element to find the 'hook' or pointer to the last one.

Just looking at the numbers, one can make some educated guesses of the internals of each data structure:

  • pop from a stack is fast, as expected
  • push to stack is slower. and it's slower than adding to the end of the list. Why?
    • apparently the allocation unit size for stack is smaller -- it may only increase the stack size by 100, while growing the list could be done in units of 1000.
  • A list seems to be a static array. Accessing the list at the front requires memory transfers, that take time in proportion to the list length.
  • Basic linked list operations shouldn't take that much longer, it's generally only required to
    • new_item.next = list_start; list_start = new_item; // to add
    • list_start = list_start.next; // to remove
    • however, as addLast is so fast, it means that also when adding or deleting to a linked list, one has to update the pointer to the last element also. So there's extra bookkeeping.
  • Doubly linked lists OTOH make it relatively fast to insert and delete at both ends of the list (I've been informed that a better code uses DLLs), however,
    • links to previous and next item also double the work for the bookkeeping
Breadstuff answered 3/11, 2012 at 17:3 Comment(2)
(1) Reallocation for both List and Stack is (most likely) done by multiplying the size by a small constant, not by adding a constant. Adding a constant is inflexible: It causes more re-allocations for larger collections and wastes more space for smaller collections. (2) Your pseudocode for linked lists assumes a singly linked list, but it's a doubly-linked one.Boche
@delnan: Yes, to be precise. Some strategies cap the increase factor, some use fibonacci series. I still have to assume, that the strategies differ for stacks and queues -- perhaps on an assumption that queues are used for "large" data and stacks for "small" data and that the design choice in stacks is made towards minimizing unused memory.Breadstuff
N
1

I have a Java background and I guess your question relates more to general datastructures than a specific language. Also, I apologize if my statements are incorrect.

1. the similarity in performance of using Stack and the end of List

2. the differences in using the front and the end of List, and

At least in Java, Stacks are implemented using arrays (Apologies if that is not the case with C#. You could refer to the source for the implementation) And same is the case of Lists. Typical with an array, all insertions at the end takes lesser time than at the beginning because the pre-existing values in the array needs to be moved down to accommodate the insertion at the beginning.

Link to Stack.java source and its superclass Vector

3. the reason that using the end of LinkedList is so slow?

LinkedList do not allow random access and have to traverse through the nodes before reaching your insertion point. If you find that the performance is slower for the last nodes, then I suppose the LinkedList implementation should be a singly-linked list. I guess you would want to consider a doubly-linked-list for optimal performance while accessing elements at the end.

http://en.wikipedia.org/wiki/Linked_list

Nazareth answered 3/11, 2012 at 17:3 Comment(0)
A
1

the similarity in performance of using Stack and the end of List,

As explained by delnan, they both use a simple array internally, so they behave very similar when working at the end. You could see a stack being a list with just access to the last object.

the differences in using the front and the end of List

You already suspected it correctly. Manipulating the beginning of a list, means that the underlying array needs to change. Adding an item usually means that you need to shift all other elements by one, same with removing. If you know that you will be manipulating both ends of a list, you’re better of using a linked list.

the reason that using the end of LinkedList is so slow?

Usually, element insertion and deletion for linked lists at any position can be done in constant time, as you just need to change at most two pointers. The problem is just getting to the position. A normal linked list has just a pointer to its first element. So if you want to get to the last element, you need to iterate through all elements. A queue implemented with a linked list usually solves this problem by having an additional pointer to the last element, so adding elements is possible in constant time as well. The more sophisticated data structure would be a double linked list that has both pointers to the first and last element, and where each element also contains a pointer to the next and previous element.

What you should learn about this is that there are many different data structures that are made for a single purpose, which they can handle very efficiently. Choosing the correct structure depends a lot on what you want to do.

Artieartifact answered 3/11, 2012 at 17:24 Comment(1)
LinkedList is a doubly-linked list. The original posting had an error in it.Conjoin
H
0

Just improved some of the deficiencies of the previous code, especially the influence of Random and the dummy calculations. Array still tops everything, but the performance of List is impressing and LinkedList is very good for random insertions.

The sorted results are:

12      array[i]
40      list2[i]
62      FillArray
68      list2.RemoveAt
78      stack.Pop
126     list2.Add
127     queue.Dequeue
159     stack.Push
161     foreach_linkedlist1
191     queue.Enqueue
218     linkedlist1.RemoveFirst
219     linkedlist2.RemoveLast
2470        linkedlist2.AddLast
2940        linkedlist1.AddFirst

The code is:

using System;
using System.Collections.Generic;
using System.Diagnostics;
//
namespace Benchmarking {
    //
    static class Collections {
        //
        public static void Main() {
            const int limit = 9000000;
            Stopwatch sw = new Stopwatch();
            Stack<int> stack = new Stack<int>();
            Queue<int> queue = new Queue<int>();
            List<int> list1 = new List<int>();
            List<int> list2 = new List<int>();
            LinkedList<int> linkedlist1 = new LinkedList<int>();
            LinkedList<int> linkedlist2 = new LinkedList<int>();
            int dummy;

            sw.Reset();
            Console.Write( "{0,40}  ", "stack.Push");
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                stack.Push( i );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            sw.Reset();
            Console.Write( "{0,40}  ", "stack.Pop" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                stack.Pop();
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            sw.Reset();
            Console.Write( "{0,40}  ", "queue.Enqueue" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                queue.Enqueue( i );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            sw.Reset();
            Console.Write( "{0,40}  ", "queue.Dequeue" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                queue.Dequeue();
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );

            //sw.Reset();
            //Console.Write( "{0,40}  ", "Insert to List at the front..." );
            //sw.Start();
            //for ( int i = 0; i < limit; i++ ) {
            //  list1.Insert( 0, i );
            //}
            //sw.Stop();
            //Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            //
            //sw.Reset();
            //Console.Write( "{0,40}  ", "RemoveAt from List at the front..." );
            //sw.Start();
            //for ( int i = 0; i < limit; i++ ) {
            //  dummy = list1[ 0 ];
            //  list1.RemoveAt( 0 );
            //  dummy++;
            //}
            //sw.Stop();
            //Console.WriteLine( sw.ElapsedMilliseconds.ToString() );

            sw.Reset();
            Console.Write( "{0,40}  ", "list2.Add" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                list2.Add( i );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            sw.Reset();
            Console.Write( "{0,40}  ", "list2.RemoveAt" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                list2.RemoveAt( list2.Count - 1 );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            sw.Reset();
            Console.Write( "{0,40}  ", "linkedlist1.AddFirst" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                linkedlist1.AddFirst( i );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            sw.Reset();
            Console.Write( "{0,40}  ", "linkedlist1.RemoveFirst" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                linkedlist1.RemoveFirst();
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            sw.Reset();
            Console.Write( "{0,40}  ", "linkedlist2.AddLast" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                linkedlist2.AddLast( i );
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
            sw.Reset();
            Console.Write( "{0,40}  ", "linkedlist2.RemoveLast" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                linkedlist2.RemoveLast();
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            // Fill again
            for ( int i = 0; i < limit; i++ ) {
                list2.Add( i );
            }
            sw.Reset();
            Console.Write( "{0,40}  ", "list2[i]" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                dummy = list2[ i ];
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            // Fill array
            sw.Reset();
            Console.Write( "{0,40}  ", "FillArray" );
            sw.Start();
            var array = new int[ limit ];
            for ( int i = 0; i < limit; i++ ) {
                array[ i ] = i;
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );

            sw.Reset();
            Console.Write( "{0,40}  ", "array[i]" );
            sw.Start();
            for ( int i = 0; i < limit; i++ ) {
                dummy = array[ i ];
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );


            // Fill again
            for ( int i = 0; i < limit; i++ ) {
                linkedlist1.AddFirst( i );
            }
            sw.Reset();
            Console.Write( "{0,40}  ", "foreach_linkedlist1" );
            sw.Start();
            foreach ( var item in linkedlist1 ) {
                dummy = item;
            }
            sw.Stop();
            Console.WriteLine( sw.ElapsedMilliseconds.ToString() );

            //
            Console.WriteLine( "Press Enter to end." );
            Console.ReadLine();
        }
    }
}
Hsining answered 6/8, 2018 at 7:37 Comment(0)
R
0

I'm coming in very late to this one but there is a BoundedChannel that worked out great for me for keeping a sliding windowed buffer.

https://learn.microsoft.com/en-us/dotnet/core/extensions/channels

Reedy answered 9/3, 2023 at 15:1 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.