Splitting objects into buckets using PowerShell

Today I quickly needed to split users into migration batches for Office 365, and with that I needed to create many buckets of 100 user objects each. The following script can help you out if you need something similar.

function ConvertTo-Buckets
{
    [CmdletBinding()]
    [Alias()]
    [OutputType([int])]
    Param
    (
        # Total number of buckets, bucket size will vary
        [Parameter(Mandatory=$true,
                    Position=0,
                    ParameterSetName="NumberOfBuckets")]
        [ValidateRange(2, 9999)]
        [int] $NumberOfBuckets,
                  
        # Size of each bucket, last bucket will have different size of the rest
        [Parameter(Mandatory=$true,
                    Position=1,
                    ParameterSetName="BucketSize")]
        [ValidateRange(2, 99999)]
        [int]  $BucketSize,

        # Input object to put into bucket
        [Parameter(Mandatory=$true,
                    Position=2, ValueFromPipeline=$true)]
        $InputObject
    )
                  
    Begin
    {
        $Buckets = New-Object System.Collections.ArrayList<Object>
        if($NumberOfBuckets -gt 0) {
            # Add numberofbuckets number of array lists to create a multi dimensional array list
            1..$NumberOfBuckets | Foreach {
                $Buckets.Add((New-Object System.Collections.ArrayList<Object>)) | Out-Null
            }   
        } else {
            # Add a single bucket as our first bucket
            $Buckets.Add((New-Object System.Collections.ArrayList<Object>)) | Out-Null
        }
        $index = 0
    }
    Process
    {
        if($NumberOfBuckets -gt 0) {
            $index = ($index + 1) % $NumberOfBuckets
            $Buckets[$index].Add($InputObject) | Out-Null
        } else {
            $Buckets[$index].Add($InputObject) | Out-Null
            if($Buckets[$index].Count -ge $BucketSize) {
                $Buckets.Add((New-Object System.Collections.ArrayList<Object>)) | Out-Null
                $index += 1
            }
        }
        
    }
    End
    {
        $Buckets
    }
}


# Example 1 - Split into 5 buckets
$buc = dir C:\Windows | ConvertTo-Buckets -NumberOfBuckets 5
$buc | Foreach -Begin {$inc = 1} -Process {
    Write-Host -ForegroundColor Red "Bucket $($inc):"
    $inc++ 

    $_ | Out-String | Write-host -ForegroundColor White
}

# Example 2 - Split into buckets of size 15
$buc = dir C:\Windows | ConvertTo-Buckets -BucketSize 15
$buc | Foreach -Begin {$inc = 1} -Process {
    Write-Host -ForegroundColor Red "Bucket $($inc):"
    $inc++ 

    $_ | Out-String | Write-host -ForegroundColor White
}

# Example 3 - Split into buckets of size 15, and export to csv files dynamically
$folder = "~\Downloads\$([guid]::newguid())"
Write-Verbose "Creating folder $folder" -Verbose
mkdir $folder | Out-Null 
$buc = dir C:\Windows | ConvertTo-Buckets -BucketSize 15
$buc | Foreach -Begin {$inc = 1} -Process {
    $file = "$folder\$inc.csv"
    $inc++ 
    Write-Verbose "Creating file $file" -Verbose
    

    $_ | Select Name, Extension | Export-Csv -Path $file -UseCulture -NoTypeInformation
}
ii $folder

# Example 4 - Get all email enabled users from AD, that is not migrated to Office 365, and create migration batches of 100 users per batch
$folder = "~\Downloads\$([guid]::newguid())"
Write-Verbose "Creating folder $folder" -Verbose
mkdir $folder | Out-Null 
$buc = Get-ADUser -Filter {targetaddress -notlike "*" -and mail -like "*@*"} -Properties mail | ConvertTo-Buckets -BucketSize 100
$buc | Foreach -Begin {$inc = 1} -Process {
    $file = "$folder\$inc.csv"
    $inc++ 
    Write-Verbose "Creating file $file" -Verbose
    

    $_ | Select @{Label="EmailAddress";Expression={$_.mail}} | Export-Csv -Path $file -UseCulture -NoTypeInformation
}
ii $folder

One thought on “Splitting objects into buckets using PowerShell

Leave a reply to Kovi Cancel reply