# -*- coding: utf-8 -*-
"""
Created on Wed Dec 30 16:05:47 2015
@author: M Wieser
LICENCE
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
"""
import numpy as np
from scipy.sparse.linalg import spsolve
from scipy.linalg import cholesky
# from scikits.sparse.cholmod import cholesky
from scipy import sparse
from scipy.stats import norm
import matplotlib.pyplot as pl
[docs]def als(y, lam=1e6, p=0.1, itermax=10):
r"""
Implements an Asymmetric Least Squares Smoothing
baseline correction algorithm (P. Eilers, H. Boelens 2005)
Baseline Correction with Asymmetric Least Squares Smoothing
based on https://github.com/vicngtor/BaySpecPlots
Baseline Correction with Asymmetric Least Squares Smoothing
Paul H. C. Eilers and Hans F.M. Boelens
October 21, 2005
Description from the original documentation:
Most baseline problems in instrumental methods are characterized by a smooth
baseline and a superimposed signal that carries the analytical information: a series
of peaks that are either all positive or all negative. We combine a smoother
with asymmetric weighting of deviations from the (smooth) trend get an effective
baseline estimator. It is easy to use, fast and keeps the analytical peak signal intact.
No prior information about peak shapes or baseline (polynomial) is needed
by the method. The performance is illustrated by simulation and applications to
real data.
Inputs:
y:
input data (i.e. chromatogram of spectrum)
lam:
parameter that can be adjusted by user. The larger lambda is,
the smoother the resulting background, z
p:
wheighting deviations. 0.5 = symmetric, <0.5: negative
deviations are stronger suppressed
itermax:
number of iterations to perform
Output:
the fitted background vector
"""
L = len(y)
# D = sparse.csc_matrix(np.diff(np.eye(L), 2))
D = sparse.eye(L, format='csc')
D = D[1:] - D[:-1] # numpy.diff( ,2) does not work with sparse matrix. This is a workaround.
D = D[1:] - D[:-1]
D = D.T
w = np.ones(L)
for i in range(itermax):
W = sparse.diags(w, 0, shape=(L, L))
Z = W + lam * D.dot(D.T)
z = spsolve(Z, w * y)
w = p * (y > z) + (1 - p) * (y < z)
return z
[docs]def arpls(y, lam=1e4, ratio=0.05, itermax=100):
r"""
Baseline correction using asymmetrically
reweighted penalized least squares smoothing
Sung-June Baek, Aaron Park, Young-Jin Ahna and Jaebum Choo,
Analyst, 2015, 140, 250 (2015)
Abstract
Baseline correction methods based on penalized least squares are successfully
applied to various spectral analyses. The methods change the weights iteratively
by estimating a baseline. If a signal is below a previously fitted baseline,
large weight is given. On the other hand, no weight or small weight is given
when a signal is above a fitted baseline as it could be assumed to be a part
of the peak. As noise is distributed above the baseline as well as below the
baseline, however, it is desirable to give the same or similar weights in
either case. For the purpose, we propose a new weighting scheme based on the
generalized logistic function. The proposed method estimates the noise level
iteratively and adjusts the weights correspondingly. According to the
experimental results with simulated spectra and measured Raman spectra, the
proposed method outperforms the existing methods for baseline correction and
peak height estimation.
Inputs:
y:
input data (i.e. chromatogram of spectrum)
lam:
parameter that can be adjusted by user. The larger lambda is,
the smoother the resulting background, z
ratio:
wheighting deviations: 0 < ratio < 1, smaller values allow less negative values
itermax:
number of iterations to perform
Output:
the fitted background vector
"""
N = len(y)
# D = sparse.csc_matrix(np.diff(np.eye(N), 2))
D = sparse.eye(N, format='csc')
D = D[1:] - D[:-1] # numpy.diff( ,2) does not work with sparse matrix. This is a workaround.
D = D[1:] - D[:-1]
H = lam * D.T * D
w = np.ones(N)
for i in range(itermax):
W = sparse.diags(w, 0, shape=(N, N))
WH = sparse.csc_matrix(W + H)
C = sparse.csc_matrix(cholesky(WH.todense()))
z = spsolve(C, spsolve(C.T, w * y))
d = y - z
dn = d[d < 0]
m = np.mean(dn)
s = np.std(dn)
wt = 1. / (1 + np.exp(2 * (d - (2 * s - m)) / s))
if np.linalg.norm(w - wt) / np.linalg.norm(w) < ratio:
break
w = wt
return z
[docs]def WhittakerSmooth(x, w, lam, differences=1):
'''
Penalized least squares algorithm for background fitting
input
x:
input data (i.e. chromatogram of spectrum)
w:
binary masks (value of the mask is zero if a point belongs to peaks
and one otherwise)
lam:
parameter that can be adjusted by user. The larger lambda is, the
smoother the resulting background
differences:
integer indicating the order of the difference of penalties
output:
the fitted background vector
'''
X = np.matrix(x)
m = X.size
# D = csc_matrix(np.diff(np.eye(m), differences))
D = sparse.eye(m, format='csc')
for i in range(differences):
D = D[1:] - D[:-1] # numpy.diff() does not work with sparse matrix. This is a workaround.
W = sparse.diags(w, 0, shape=(m, m))
A = sparse.csc_matrix(W + (lam * D.T * D))
B = sparse.csc_matrix(W * X.T)
background = spsolve(A, B)
return np.array(background)
[docs]def airpls(x, lam=100, porder=1, itermax=100):
'''
airpls.py Copyright 2014 Renato Lombardo - renato.lombardo@unipa.it
Baseline correction using adaptive iteratively reweighted penalized least squares
This program is a translation in python of the R source code of airPLS version 2.0
by Yizeng Liang and Zhang Zhimin - https://code.google.com/p/airpls
Reference:
Z.-M. Zhang, S. Chen, and Y.-Z. Liang, Baseline correction using adaptive
iteratively reweighted penalized least squares. Analyst 135 (5), 1138-1146 (2010).
Description from the original documentation:
Baseline drift always blurs or even swamps signals and deteriorates analytical
results, particularly in multivariate analysis. It is necessary to correct
baseline drift to perform further data analysis. Simple or modified polynomial
fitting has been found to be effective in some extent. However, this method
requires user intervention and prone to variability especially in low
signal-to-noise ratio environments. The proposed adaptive iteratively
reweighted Penalized Least Squares (airPLS) algorithm doesn't require any
user intervention and prior information, such as detected peaks. It
iteratively changes weights of sum squares errors (SSE) between the fitted
baseline and original signals, and the weights of SSE are obtained adaptively
using between previously fitted baseline and original signals. This baseline
estimator is general, fast and flexible in fitting baseline.
Adaptive iteratively reweighted penalized least squares for baseline fitting
input
x:
input data (i.e. chromatogram of spectrum)
lam:
parameter that can be adjusted by user. The larger lambda is,
the smoother the resulting background, z
porder:
integer indicating the order of the difference of penalties
output:
the fitted background vector
'''
m = x.shape[0]
w = np.ones(m)
for i in range(1, itermax + 1):
z = WhittakerSmooth(x, w, lam, porder)
d = x - z
dssn = np.abs(d[d < 0].sum())
if(dssn < 0.001 * (abs(x)).sum() or i == itermax):
if(i == itermax):
print('airpls: max iteration reached!')
break
w[d >= 0] = 0 # d>0 means that this point is part of a peak,
# so its weight is set to 0 in order to ignore it
w[d < 0] = np.exp(i * np.abs(d[d < 0]) / dssn)
w[0] = np.exp(i * (d[d < 0]).max() / dssn)
w[-1] = w[0]
return z
if __name__ == '__main__':
'''
Example usage and testing
'''
print('Testing...')
x = np.arange(0, 1000, 1)
g1 = norm(loc=100, scale=10.0) # generate three gaussian as a signal
g2 = norm(loc=300, scale=30.0)
g3 = norm(loc=750, scale=5.0)
signal = g1.pdf(x) + g2.pdf(x) + g3.pdf(x)
baseline1 = 5e-4 * x + 0.2 # linear baseline
baseline2 = 0.2 * np.sin(np.pi * x / x.max()) # sinusoidal baseline
noise = np.random.poisson(100 * baseline2, size=x.shape[0]) / 800
print('Generating simulated experiment')
y1 = signal + baseline1 + noise
y2 = signal + baseline2 + noise
print('Removing baselines')
c1 = y1 - airpls(y1) # corrected values
c2 = y1 - arpls(y1, 1e7, 0.1) # with baseline removed
g1 = y2 - airpls(y2) # corrected values
g2 = y2 - arpls(y2, 1e7, 0.01) # with baseline removed
print('Plotting results')
fig, ax = pl.subplots(nrows=2, ncols=1)
ax[0].plot(x, y1, '-k')
ax[0].plot(x, c1, '-r')
ax[0].plot(x, c2, '-b')
ax[0].set_title('Linear baseline')
ax[1].plot(x, y2, '-k')
ax[1].plot(x, g1, '-r')
ax[1].plot(x, g2, '-b')
ax[1].set_title('Sinusoidal baseline')
pl.show()
print('Done!')