Notebook for data preparation

3. Notebook for data preparation

## BUILT ON THE DEUS VULT VERSION; DISCARD IF IN DOUBT ##

%matplotlib inline
from pylab import rcParams

import warnings

import os, os.path
import pandas as pd, numpy as np, scipy as sp
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import cufflinks as cf, matplotlib as mp
cf.go_offline()
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter 
import matplotlib.dates as mdates
import matplotlib.cm as cm
from matplotlib import dates
from matplotlib.ticker import AutoMinorLocator, LinearLocator, AutoLocator

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from scipy.signal import argrelextrema, find_peaks
from functools import wraps

import ipywidgets as widgets
from ipywidgets import interact, interact_manual, fixed
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
/var/folders/x3/2bzh843n0tv469w6l6sd8sq00000gn/T/ipykernel_23894/1775590290.py in <module>
     11 from datetime import datetime, timedelta
     12 
---> 13 import cufflinks as cf, matplotlib as mp
     14 cf.go_offline()
     15 import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'cufflinks'
plt.style.use('fivethirtyeight')
mp.rcParams['font.size'] = 22
mp.rcParams['axes.labelsize'] = 32
mp.rcParams['figure.figsize'] = (15, 10)
R_M = 2440 # km
M_0 = -1.96e02 * R_M**3 # nT. Alexeev, Belenkaya et al 2010 doi:10.1016/j.icarus.2010.01.024
z_displacement = 484
resolution = 60
resolutions = [60, 10, 5, 1]

def load_data(resolution=60, year=None, mindoy=None, maxdoy=None):
    if resolution not in resolutions:
        print("Resolution not available, using 60sec averages")
        resolution = 60
    
    data_files = []
    path = os.path.join(r'/home/dp/archive/messenger/full/',"%02d" % resolution)
   
    for file in sorted(os.listdir(path)):
        if year is None or file.startswith("MAGMSOSCIAVG" + str(year % 2000)) and file.endswith(".TAB"):
            doy = int(file.split("_")[0].replace("MAGMSOSCIAVG" + str(year % 2000), ""), 10)
            if (mindoy is None or doy >= mindoy) and (maxdoy is None or doy <= maxdoy):
                data_files.append(pd.read_table(os.path.join(path, file), delim_whitespace=True, header=None))
            if doy > maxdoy:
                break

    data = pd.concat(data_files, ignore_index=True)
    data.columns = ['YEAR', 'DAY_OF_YEAR', 'HOUR', 'MINUTE', 'SECOND',
        'TIME_TAG', 'NAVG', 'X_MSO', 'Y_MSO', 'Z_MSO', 'BX_MSO',
        'BY_MSO', 'BZ_MSO', 'DBX_MSO', 'DBY_MSO', 'DBZ_MSO']
    data['DATE'] = data.apply(lambda x: 
        datetime.strptime("%d-%03d_%02d:%02d:%02d" % (x.YEAR, x.DAY_OF_YEAR, x.HOUR, x.MINUTE, x.SECOND),
            "%Y-%j_%H:%M:%S"), axis=1)
    data = data.drop(['YEAR', 'DAY_OF_YEAR', 'HOUR', 'MINUTE', 'SECOND', 'NAVG', 'TIME_TAG'], axis=1)
    data = data.set_index('DATE')
    grouped = data.groupby(level=0)
    data = grouped.last()
    return data
@interact(resolution=resolutions, year=(2011, 2015, 1), start_doy=(1, 365, 1), end_doy=(1, 365, 1))
def w_load_data(resolution=resolution, year=2011, start_doy=91, end_doy=120):
    data = load_data(resolution, year, start_doy, end_doy)
    return data
philpott_labels = pd.read_csv("jgra55678-sup-0002-table_si-s01-ts.csv", parse_dates=True, index_col='Timestamp')
# dfs = [load_data(10, yyyy, 1, 365) for yyyy in range(2011, 2016)]
philpott_labels
Boundary number Orbit number X_MSO (km) Y_MSO (km) Z_MSO (km)
Timestamp
2011-03-23 23:47:02.700 1 12 6275.334 8142.117 -4504.138
2011-03-23 23:59:44.400 2 12 5928.638 7831.493 -3564.992
2011-03-24 01:10:00.100 3 12 2258.153 3712.435 2075.611
2011-03-24 01:14:03.200 4 12 1888.531 3238.635 2347.968
2011-03-24 02:14:46.100 5 12 -2025.455 -3617.479 -3305.720
... ... ... ... ... ...
2015-04-30 10:50:18.000 4 4104 2937.147 1285.647 -534.036
2015-04-30 12:20:00.000 5 4104 -7232.388 -3543.218 -1958.539
2015-04-30 12:32:59.000 6 4104 -7672.303 -3871.532 -3002.659
2015-04-30 14:06:17.000 7 4104 -7880.844 -4669.875 -8858.618
2015-04-30 14:43:28.000 8 4104 -7072.820 -4483.617 -10367.245

32532 rows × 5 columns

smallzmp = philpott_labels[(philpott_labels["X_MSO (km)"] > 0) & (np.abs(philpott_labels["Z_MSO (km)"]) < 814) & (np.abs(philpott_labels["Y_MSO (km)"]) < 814) & (philpott_labels["Boundary number"].isin([3, 6]))]
smallzmp
Boundary number Orbit number X_MSO (km) Y_MSO (km) Z_MSO (km)
Timestamp
2011-08-22 09:25:19 6 315 3544.797 795.090 216.710
2011-08-22 21:26:21 6 316 3654.521 693.637 64.889
2011-08-23 09:24:31 6 317 3511.630 491.595 384.190
2011-08-23 21:27:13 6 318 3759.002 427.960 -39.444
2011-08-24 09:26:06 6 319 3673.590 249.131 163.598
... ... ... ... ... ...
2015-02-05 01:39:40 3 3860 3422.122 389.147 -786.500
2015-02-05 09:57:27 3 3861 3362.473 310.119 -572.979
2015-02-06 02:30:27 3 3863 3381.061 121.609 -605.080
2015-02-06 10:46:33 3 3864 3408.521 20.918 -692.386
2015-02-06 19:02:53 3 3865 3419.314 -75.403 -737.280

140 rows × 5 columns

vzs = []

def getspeed(orbit, dt):
    try:
        dfx = pd.read_csv(f"~/archive/messenger/orbits/messenger-{int(orbit):04d}.csv", parse_dates=True, index_col='DATE')
        vx, vy, vz = dfx["X_MSO"].diff()[dt], dfx["Y_MSO"].diff()[dt], dfx["Z_MSO"].diff()[dt]
    except:
        return None
    vxy = np.sqrt(vx**2 + vy**2)
    if np.abs(vx)*2 >= np.abs(vz):
        return True
    else:
        return False
    
for dt, crossing in smallzmp.iterrows():
    orbit_num = int(crossing["Orbit number"])
    for offset in [11, 12, 10]:
        res = getspeed(orbit_num - offset, dt)
        if res is not None:
            break
    if res == True:
        print(dt, orbit_num)
        vzs.append((dt, orbit_num))
        
print(vzs)
2011-08-22 09:25:19 315
2011-08-23 09:24:31 317
2011-08-24 09:26:06 319
2011-08-24 21:24:07 320
2011-08-25 09:26:14 321
2011-08-25 21:25:51 322
2011-08-26 09:24:13 323
2011-08-26 21:25:56 324
2011-08-27 09:25:58 325
2011-11-18 10:27:48 493
2011-11-18 22:31:00 494
2011-11-20 10:29:12 497
2011-11-20 22:31:25 498
2011-11-21 10:30:50 499
2012-02-14 03:51:22 671
2012-02-14 15:37:47 672
2012-02-15 03:23:34 673
2012-02-15 15:11:15 674
2012-02-16 14:46:07 676
2012-02-17 02:33:20 677
2012-02-17 14:20:12 678
2012-03-18 18:25:35 740
2012-05-12 07:30:08 875
2012-05-12 15:33:44 876
2012-05-12 23:31:15 877
2012-05-13 07:33:21 878
2012-05-13 15:30:56 879
2012-05-13 23:33:09 880
2012-05-14 23:30:58 883
2012-05-15 07:28:37 884
2012-05-15 15:31:56 885
2012-05-15 23:30:01 886
2012-08-07 07:46:56 1136
2012-08-08 23:50:44 1141
2012-08-09 07:48:48 1142
2012-08-10 23:49:20 1147
2012-09-12 15:09:06 1245
2012-11-03 16:09:44 1401
2012-11-07 16:07:39 1413
2012-11-08 00:12:22 1414
2013-03-05 07:51:18 1766
2013-03-08 23:50:27 1777
2013-06-02 16:18:24 2034
2013-08-27 08:43:14 2291
2013-08-28 00:45:21 2293
2013-08-28 08:44:35 2294
2013-08-28 16:45:08 2295
2013-08-29 00:45:31 2296
2013-08-29 16:45:10 2298
2013-08-31 00:46:12 2302
2013-08-31 16:46:11 2304
2013-09-01 00:43:56 2305
2013-09-01 08:45:47 2306
2013-11-22 09:13:24 2552
2013-11-22 17:11:49 2553
2013-11-23 01:10:21 2554
2013-11-23 09:11:17 2555
2013-11-23 17:09:47 2556
2013-11-24 01:11:47 2557
2013-11-24 09:11:07 2558
2013-11-24 17:09:23 2559
2013-11-25 01:10:34 2560
2013-11-25 17:08:28 2562
2013-11-26 01:08:07 2563
2013-11-26 09:11:18 2564
2013-11-27 01:08:50 2566
2013-11-27 09:09:23 2567
2014-02-18 01:45:43 2815
2014-02-19 01:47:19 2818
2014-02-19 09:46:19 2819
2014-02-19 17:48:29 2820
2014-02-20 01:48:11 2821
2014-02-21 01:44:41 2824
2014-02-21 17:46:50 2826
2014-02-22 01:49:40 2827
2014-02-23 17:47:12 2832
2014-05-17 02:27:01 3079
2014-05-18 18:24:22 3084
[(Timestamp('2011-08-22 09:25:19'), 315), (Timestamp('2011-08-23 09:24:31'), 317), (Timestamp('2011-08-24 09:26:06'), 319), (Timestamp('2011-08-24 21:24:07'), 320), (Timestamp('2011-08-25 09:26:14'), 321), (Timestamp('2011-08-25 21:25:51'), 322), (Timestamp('2011-08-26 09:24:13'), 323), (Timestamp('2011-08-26 21:25:56'), 324), (Timestamp('2011-08-27 09:25:58'), 325), (Timestamp('2011-11-18 10:27:48'), 493), (Timestamp('2011-11-18 22:31:00'), 494), (Timestamp('2011-11-20 10:29:12'), 497), (Timestamp('2011-11-20 22:31:25'), 498), (Timestamp('2011-11-21 10:30:50'), 499), (Timestamp('2012-02-14 03:51:22'), 671), (Timestamp('2012-02-14 15:37:47'), 672), (Timestamp('2012-02-15 03:23:34'), 673), (Timestamp('2012-02-15 15:11:15'), 674), (Timestamp('2012-02-16 14:46:07'), 676), (Timestamp('2012-02-17 02:33:20'), 677), (Timestamp('2012-02-17 14:20:12'), 678), (Timestamp('2012-03-18 18:25:35'), 740), (Timestamp('2012-05-12 07:30:08'), 875), (Timestamp('2012-05-12 15:33:44'), 876), (Timestamp('2012-05-12 23:31:15'), 877), (Timestamp('2012-05-13 07:33:21'), 878), (Timestamp('2012-05-13 15:30:56'), 879), (Timestamp('2012-05-13 23:33:09'), 880), (Timestamp('2012-05-14 23:30:58'), 883), (Timestamp('2012-05-15 07:28:37'), 884), (Timestamp('2012-05-15 15:31:56'), 885), (Timestamp('2012-05-15 23:30:01'), 886), (Timestamp('2012-08-07 07:46:56'), 1136), (Timestamp('2012-08-08 23:50:44'), 1141), (Timestamp('2012-08-09 07:48:48'), 1142), (Timestamp('2012-08-10 23:49:20'), 1147), (Timestamp('2012-09-12 15:09:06'), 1245), (Timestamp('2012-11-03 16:09:44'), 1401), (Timestamp('2012-11-07 16:07:39'), 1413), (Timestamp('2012-11-08 00:12:22'), 1414), (Timestamp('2013-03-05 07:51:18'), 1766), (Timestamp('2013-03-08 23:50:27'), 1777), (Timestamp('2013-06-02 16:18:24'), 2034), (Timestamp('2013-08-27 08:43:14'), 2291), (Timestamp('2013-08-28 00:45:21'), 2293), (Timestamp('2013-08-28 08:44:35'), 2294), (Timestamp('2013-08-28 16:45:08'), 2295), (Timestamp('2013-08-29 00:45:31'), 2296), (Timestamp('2013-08-29 16:45:10'), 2298), (Timestamp('2013-08-31 00:46:12'), 2302), (Timestamp('2013-08-31 16:46:11'), 2304), (Timestamp('2013-09-01 00:43:56'), 2305), (Timestamp('2013-09-01 08:45:47'), 2306), (Timestamp('2013-11-22 09:13:24'), 2552), (Timestamp('2013-11-22 17:11:49'), 2553), (Timestamp('2013-11-23 01:10:21'), 2554), (Timestamp('2013-11-23 09:11:17'), 2555), (Timestamp('2013-11-23 17:09:47'), 2556), (Timestamp('2013-11-24 01:11:47'), 2557), (Timestamp('2013-11-24 09:11:07'), 2558), (Timestamp('2013-11-24 17:09:23'), 2559), (Timestamp('2013-11-25 01:10:34'), 2560), (Timestamp('2013-11-25 17:08:28'), 2562), (Timestamp('2013-11-26 01:08:07'), 2563), (Timestamp('2013-11-26 09:11:18'), 2564), (Timestamp('2013-11-27 01:08:50'), 2566), (Timestamp('2013-11-27 09:09:23'), 2567), (Timestamp('2014-02-18 01:45:43'), 2815), (Timestamp('2014-02-19 01:47:19'), 2818), (Timestamp('2014-02-19 09:46:19'), 2819), (Timestamp('2014-02-19 17:48:29'), 2820), (Timestamp('2014-02-20 01:48:11'), 2821), (Timestamp('2014-02-21 01:44:41'), 2824), (Timestamp('2014-02-21 17:46:50'), 2826), (Timestamp('2014-02-22 01:49:40'), 2827), (Timestamp('2014-02-23 17:47:12'), 2832), (Timestamp('2014-05-17 02:27:01'), 3079), (Timestamp('2014-05-18 18:24:22'), 3084)]
for z in vzs:
    orbit = int(z[1])
    xoffset = 0
    for offset in [11, 12, 10]:
        df = pd.read_csv(f"~/archive/messenger/orbits/messenger-{int(orbit) - offset:04d}.csv", parse_dates=True, index_col='DATE')
        found = len(df[z[0] - timedelta(seconds=0):z[0] + timedelta(seconds=0)])
        if found > 0:
            xoffset = offset
            break
        else:
            print(offset, orbit, 'didnt work')
    df = df[z[0] - timedelta(seconds=300):z[0] + timedelta(seconds=300)]
    df["BABS"] = np.sqrt(np.power(df["BX_MSO"], 2) + np.power(df["BY_MSO"], 2) + np.power(df["BZ_MSO"], 2))
    fig = make_subplots(rows=8, cols=1, subplot_titles=("X MSO", "Y MSO", "Z MSO", "R", "BX MSO", "BY MSO", "BZ MSO", "|B|"))
    fig.add_trace(go.Scatter(x=df.index, y=df['X_MSO']/R_M, mode="lines", name="X"), row=1, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['Y_MSO']/R_M, mode="lines", name="Y"), row=2, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['Z_MSO']/R_M, mode="lines", name="Z"), row=3, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['RHO']/R_M, mode="lines", name="R"), row=4, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['BX_MSO'], mode="lines", name="BX"), row=5, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['BY_MSO'], mode="lines", name="BY"), row=6, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['BZ_MSO'], mode="lines", name="BZ"), row=7, col=1);
    fig.add_trace(go.Scatter(x=df.index, y=df['BABS'], mode="lines", name="|B|"), row=8, col=1);
    
    for i in range(1, 5):
        fig.update_yaxes(title_text="Distance [R_M]", range=[-2, 2], row=i, col=1)
    
    for i in range(5, 9):
        fig.update_yaxes(title_text="Magnetic field [nT]", row=i, col=1)
    
    for i in range(1, 9):
        fig.add_vline(x=z[0], row=i, col=1)
    fig.update_layout(height=1600, width=800, showlegend=False, title=f"MESSENGER orbit {orbit-xoffset}")
    fig.write_image(f"noon_crossing-{orbit-xoffset}-{z[0]}.png")
11 1136 didnt work
11 1141 didnt work
11 1142 didnt work
11 1147 didnt work
11 1245 didnt work
11 1401 didnt work
11 1413 didnt work
11 1414 didnt work
11 1766 didnt work
11 1777 didnt work
11 2034 didnt work
11 2291 didnt work
11 2293 didnt work
11 2294 didnt work
11 2295 didnt work
11 2296 didnt work
11 2298 didnt work
11 2302 didnt work
11 2304 didnt work
11 2305 didnt work
11 2306 didnt work
11 2552 didnt work
11 2553 didnt work
11 2554 didnt work
11 2555 didnt work
11 2556 didnt work
11 2557 didnt work
11 2558 didnt work
11 2559 didnt work
11 2560 didnt work
11 2562 didnt work
11 2563 didnt work
11 2564 didnt work
11 2566 didnt work
11 2567 didnt work
11 2815 didnt work
11 2818 didnt work
11 2819 didnt work
11 2820 didnt work
11 2821 didnt work
11 2824 didnt work
11 2826 didnt work
11 2827 didnt work
11 2832 didnt work
11 3079 didnt work
11 3084 didnt work

MESSENGER data is available in multiple resolutions from 1 second averages to 60 second averages. Most of the calculations have been performed with 60-second resolution for speed; final results have been obtained on 5-second resolution data as that nearly matches the resolution capacity of our analytical model.

Original MESSENGER data contains a number of artifacts.

  1. There are gaps in data that may lead to incorrectly calculated diffs if not approached with caution.

  2. There are spikes associated with the execution of checkout commands. These commands apply an artificial signal to the sensor so that sensor functionality and calibration can be verified. The checkouts occur nominally once per week, and the dates are noted in the documentation of the data set in the file MESS_MAGRDR_DS.CAT.

These datapoints are filtered out and replaced with NaN values.

xyear, doy0, doy1 = 2012, 201, 288

data = load_data(resolution, xyear, doy0, doy1)

default_delta = np.timedelta64(resolution*10**9, 'ns')
default_delta = np.timedelta64(60*10**9, 'ns') # 1 minute

payload_columns = ["BX_MSO", "BY_MSO", "BZ_MSO", "X_MSO", "Y_MSO", "Z_MSO"]


def insert_gaps(data, gap_size):
    gap_indices = np.where(np.diff(data.index) > default_delta)[0]
    for i in gap_indices:
        for c in payload_columns:
            data.iat[i, data.columns.get_loc(c)] = np.nan
        
insert_gaps(data, resolution)

In order to retrieve and process the checkout dates, we first execute the following commands (BASH):

wget https://pds-ppi.igpp.ucla.edu/ditdos/download?id=pds://PPI/MESS-E_V_H_SW-MAG-4-SUMM-CALIBRATED-V1.0/CATALOG/MESS_MAGRDR_DS.CAT -O MESS_MAGRDR_DS.CAT
sed -i 's/&nbsp;//g' MESS_MAGRDR_DS.CAT
grep -i checkout MESS_MAGRDR_DS.CAT  | grep '('  | sed 's/1Apr2005/01Apr2005/g' | sed 's/9Aug2005/09Apr2005/g' | awk '{ print substr($0, 0, 28)}' > checkout.dat
def load_checkout_dates(filename, first, last):
    res = []
    with open(filename) as f:
        for l in f.readlines():
            start, end = l[0:9], l[-15:-6]
            start = datetime.strptime(start + "-00:00:00", '%d%b%Y-%H:%M:%S')
            end = datetime.strptime(end + "-23:59:59", '%d%b%Y-%H:%M:%S')
            if (end > first and start < last) or (start < last and start > first):
                res.append((start, end))
        return res

checkout = load_checkout_dates('checkout.dat', data.index[0], data.index[-1])
def filter_checkouts(data, checkout, outlier_threshold):
    data = data.copy(True)
    payload_columns = ["BX_MSO", "BY_MSO", "BZ_MSO"]
    for c in checkout:
        outlier_indices = np.where(data.loc[c[0]:c[1]]['BX_MSO']**2 +
                                   data.loc[c[0]:c[1]]['BY_MSO']**2 +
                                   data.loc[c[0]:c[1]]['BZ_MSO']**2 > outlier_threshold**2)[0]
        for i in outlier_indices:
            iprev = data.loc[c[0]:c[1]].index[i-1]
            inext = data.loc[c[0]:c[1]].index[i+1]
            i = data.loc[c[0]:c[1]].index[i]
            for col in payload_columns:
                for j in [iprev, i, inext]:
                    data.at[j, col] = np.nan
    bms = []
    for c in checkout:
        bms.append(np.array(np.sqrt(
            data.loc[c[0] : c[1]]['BX_MSO']**2 +
            data.loc[c[0] : c[1]]['BY_MSO']**2 +
            data.loc[c[0] : c[1]]['BZ_MSO']**2)
        ))
        bmx = bms[-1][~np.isnan(bms[-1])]
    if len(bms) > 0:
        bm = np.concatenate(bms)
        bm = bm[~np.isnan(bm)]
    else:
        bm = []
    return data, bm
# Threshold manually chosen after careful examination of the resulting plot
data, bm = filter_checkouts(data, checkout, outlier_threshold=550)
@interact(data=fixed(data), checkout=fixed(checkout), outlier_threshold=widgets.FloatSlider(min=100, max=1000, step=10, continuousUpdate=False))
def w_filter_checkouts(**kwargs):
    _, bm = filter_checkouts(**kwargs)
    plt.hist(bm, 100, log=True);
    plt.xlabel('$|B|$');
    plt.ylabel('$\log\,N$');
data['R'] = np.sqrt((data.X_MSO**2+data.Y_MSO**2+data.Z_MSO**2))
data['RXY'] = np.sqrt((data.X_MSO**2+data.Y_MSO**2))
#with warnings.catch_warnings(record=True) as w:
#warnings.simplefilter("always")
df_apsis = pd.concat([
    pd.DataFrame(data.R.iloc[argrelextrema(data.R.values, np.less)].rename("Periapsis")),
    pd.DataFrame(data.R.iloc[argrelextrema(data.R.values, np.greater)].rename("Apoapsis"))],
    axis=1, join='outer')
@interact(df_apsis=fixed(df_apsis))
def apsis_plot(df_apsis):
    fig = go.Figure()
    
    lines = {'Periapsis': 'Высота перицентра', 'Apoapsis': 'Высота апоцентра'}
    for k in lines.keys():
        line = df_apsis[~np.isnan(df_apsis[k])]
        fig.add_trace(go.Scatter(x=line.index, y=line[k].values,
                    mode='lines+markers',
                    name=lines[k]))
    
    fig.update_layout(
        title=go.layout.Title(
            text="Апсиды орбиты КА MESSENGER ({:%Y-%m-%d} - {:%Y-%m-%d})".format(
                min(df_apsis.index), max(df_apsis.index)
            ), x=0.5, xanchor='center'),
        xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text="Дата")),
        yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text="Высота [км]")),
    )
    
    fig.show()
def dipole_field(data, psi=0.0):
    data = data.copy(True)
    x, y, z = data.X_MSO, data.Y_MSO, (data.Z_MSO - z_displacement)

    # Distance of the spacecraft to Mercury's center in km
    data["R_DIPOLE"]=np.sqrt(x**2 + y**2 + z**2)
    
    # hermomagnetic colatitude in radians
    data["THETA_DIPOLE"]=np.arccos(z/data.R_DIPOLE)
    # hermomagnetic latitude in radians
    data["LAMBDA_DIPOLE"]=np.arcsin(z/data.R_DIPOLE)
        
    p = z*np.cos(psi) - x*np.sin(psi)
    Br = M_0/data.R_DIPOLE**5
    data["BX_DIPOLE"]=-Br*(- data.R_DIPOLE**2 *np.sin(psi) - 3.*x*p)
    data["BY_DIPOLE"]= Br*(3.*y*p)
    data["BZ_DIPOLE"]=-Br*(data.R_DIPOLE**2 *np.cos(psi) - 3.*z*p)
    data["BABS_DIPOLE"]=np.abs(M_0)/ data.R_DIPOLE**4 * np.sqrt(3*z**2 + data.R_DIPOLE**2)
    return data
@interact(data=fixed(data), psi=widgets.FloatSlider(min=-np.pi, max=np.pi, step=np.pi/15, continuous_update=False))
def w_dipole_field(**kwargs):
    df_dipole = dipole_field(**kwargs)[100:250]
    
    fig = go.Figure()
    
    lines = {"BX": "$B_x$", "BY": "$B_y$", "BZ": "$B_z$", "BABS": "$|B|$"}
    colors = ['black', 'red', 'green', 'blue']
    for component, color in zip(sorted(lines.keys()), colors):
        field = df_dipole[~np.isnan(df_dipole['{}_DIPOLE'.format(component)])]
        fig.add_trace(go.Scatter(x=field.index, y=field['{}_DIPOLE'.format(component)].values, 
                mode='lines', hoverinfo='x+y',
                name=lines[component], marker_color=color))
    
    fig.update_layout(
        title=go.layout.Title(
            text=r"$\text{{Магнитное поле диполя вдоль орбиты КА MESSENGER ({:%Y-%m-%d} - {:%Y-%m-%d}). }} \psi={}$".format(
                min(df_dipole.index), max(df_dipole.index), kwargs['psi']
            ), x=0.5, xanchor='center'),
        xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text="Дата")),
        yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text="B [nT]")),
    )
    
    # fig.show()
def load_mercury_horizons(resolution=60, year=None, mindoy=None, maxdoy=None):
    dateparse = lambda x: pd.datetime.strptime(x, '%Y-%b-%dZ%H:%M:%S.0000')
    data = pd.read_table("mercury-pos-min.txt", sep="\t", engine='python', parse_dates=[0], date_parser=dateparse)
    data.columns = ["DATE", "X", "Y", "Z", "VX", "VY", "VZ"]
    data = data.drop_duplicates("DATE")
    data = data.set_index('DATE')
    
    if year is not None:
        data = data[data.index.year == year]
    if mindoy is not None:
        data = data[data.index.dayofyear >= mindoy]
    if maxdoy is not None:
        data = data[data.index.dayofyear <= maxdoy]
    
    if resolution != 60:
        data = data.resample('{}S'.format(resolution)).ffill()
    
    data['VABS'] = np.sqrt(data.VX**2 + data.VY**2 + data.VZ**2)
    data['D'] = np.sqrt(data.X**2 + data.Y**2 + data.Z**2)
    return data.sort_index()


data = pd.concat([data, load_mercury_horizons(resolution, xyear, doy0, doy1)], axis=1, join='inner')
/home/dp/.local/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning:

The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
def load_mercury_hgi():
    mercury_hgi = pd.read_table("mercury-pos-hgi.txt", sep=r'\s*', engine='python')
    mercury_hgi["HGI_LON"] = mercury_hgi["HGI_LON"] * np.pi/180
    mercury_hgi["HGI_LAT"] = (90-mercury_hgi["HG_LAT"]) * np.pi/180
    mercury_hgi["X"] = mercury_hgi["AU"]*np.cos(mercury_hgi["HGI_LON"])*np.sin(mercury_hgi["HGI_LAT"])
    mercury_hgi["Y"] = mercury_hgi["AU"]*np.sin(mercury_hgi["HGI_LON"])*np.sin(mercury_hgi["HGI_LAT"])
    mercury_hgi["Z"] = mercury_hgi["AU"]*np.cos(mercury_hgi["HGI_LAT"])
    mercury_hgi['DATE'] = mercury_hgi.apply( lambda x: pd.datetime(year=int(x['YYYY']), month=1, day=1) +  timedelta(days=(int(x['DOY'])-1)), axis=1)
    mercury_hgi = mercury_hgi.set_index("DATE")
    return mercury_hgi

# mercury_hgi = load_mercury_hgi()
@interact(date=widgets.SelectionSlider(
    options=list(map(lambda x: x.strftime('%Y-%m-%d'), mercury_hgi.index)),
    continuous_update=False
))
def plot_mercury_position(date):
    orbit = go.Scatter3d(
        # 88 = Hermean year duration
        x=mercury_hgi[:88+1].X, y=mercury_hgi[:88+1].Y, z=mercury_hgi[:88+1].Z, mode='lines', name="Orbit",
        line=dict(
            color='#1f77b4',
            width=4
        )
    )

    sun = go.Scatter3d(x=[0], y=[0], z=[0], mode='markers', name='Sun')
    planet = go.Scatter3d(x=[mercury_hgi.X[date]], y=[mercury_hgi.Y[date]], z=[mercury_hgi.Z[date]], mode='markers', name="Mercury")

    layout = dict(
        width=900,
        height=700,
        autosize=False,
        title='Mercury position and orbit',
        scene=dict(
            camera=dict(
                eye=dict(x=-3, y=2, z=2)
            ),
            aspectratio=dict(x=1, y=1, z=1),
            aspectmode='data',
            xaxis_title='X [a.u.]',
            yaxis_title='Y [a.u.]',
            zaxis_title='Z [a.u.]',
        )
    )
    fig = dict(data=[orbit, sun, planet], layout=layout)
    cf.iplot(fig)
ax = plt.subplot()
ax.plot(hgi_x, hgi_y);
ax.scatter(0,0, color='r', s=240); # Position of the Sun
plt.xlabel("HGI X [а.е.]")
plt.ylabel("HGI Y [а.е.]")
ax.set_aspect('equal', adjustable='box');
ax = plt.subplot()
ax.plot(hgi_x, hgi_z)
ax.scatter(0,0, color='r', s=240); # Position of the Sun
plt.xlabel("HGI X [а.е.]")
plt.ylabel("HGI Z [а.е.]")
ax.set_aspect('equal', adjustable='box')
# USE WITH CAUTION #
#backup_mess = data.copy(deep=True)
#backup_merc = mercury_xyz.copy(deep=True)
#print(backup_mess.shape,  backup_merc.shape)
# USE WITH CAUTION #
#data = backup_mess.copy(deep=True)
#mercury_xyz = backup_merc.copy(deep=True)
#print(backup_mess.shape,  backup_merc.shape)
# USE WITH CAUTION #
#del backup_merc
#del backup_mess
def find_extrema(data):
    x, y, z, r, rxy = 'X_MSO', 'Y_MSO', 'Z_MSO', 'R', 'RXY'
    
    periapsis = find_peaks(1/data[r].values[~np.isnan(data[r].values)].round(0))[0]
    
    extrema=pd.DataFrame(index=data.index)
    extrema['TYPE']=0
    
    xy_edges = find_peaks(data[rxy].round(0).values[~np.isnan(data[rxy].values)])[0]
    
    if xy_edges[0] < periapsis[0] and xy_edges[1] > periapsis[0]:
        xy_edges = xy_edges[1:]
    xy1, xy2 = xy_edges[1::2], xy_edges[0::2]
    
    #extrema.iloc[xy_edges] = 1
    extrema.iloc[xy1] = 1
    extrema.iloc[xy2] = -1
    
    cosine_array = pd.DataFrame(index=data.index)
    cosine_array['VALUE'] = np.nan
    
    size = np.min([len(data[x].iloc[xy1].values), len(data[x].iloc[xy2].values)])
    
    dx = np.abs(data[x].iloc[xy1].values[:size] - data[x].iloc[xy2].values[:size])
    dx *= np.sign(data[x].iloc[xy1].values[:size]  - data[x].iloc[xy2].values[:size] )
    dy = data[y].iloc[xy1].values[:size] - data[y].iloc[xy2].values[:size]
    cosine = np.cos(np.arctan2(dy, dx))

    max_delta_cosine = 0.06 # Empirically chosen to cut off rogue data points
    cosine_diff = np.abs(np.diff(np.abs(cosine)))
    cosine[np.append(cosine_diff > max_delta_cosine, False)] = np.nan
    
    i_from, i_to = extrema.iloc[periapsis].index, extrema.iloc[periapsis].index[1:]
    i_to = i_to
    for i in range(len(i_to)):
        cosine_array.loc[i_from[i] : i_to[i]]['VALUE'] = cosine[i]
    cosine_array.loc[i_to[-1]:]['VALUE'] = cosine[-1]
    return extrema, cosine_array


def show_extrema(data):
    plot_size = 350000
    plt.plot((data.R[:plot_size])/max(data.R[:plot_size]), label=r'Относительное расстояние $R/R_{max}$')
    plt.plot(data.EXTREMA[:plot_size]/2, 'black', label=r'Экстремумы $R_{XY}$')
    plt.plot(data.COSALPHA[:plot_size], 'green', label=r'$\cos\,\alpha$')
    plt.xlabel("Дата")
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.legend()
    
def show_extrema(data, plot_size=350000):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=data.index,
        y=((data.R[:plot_size])/max(data.R[:plot_size])).values, 
        mode='lines', hoverinfo='x+y',
        name=r'$R/R_{max}$', marker={'color':'navy'}
    ))
    
    fig.add_trace(go.Scatter(
        x=data.index,
        y=(data.EXTREMA[:plot_size]/5).T.values[0],
        mode='lines', hoverinfo='x',
        name=r'Экстремумы $R_{XY}$', marker={'color':'orange'}
    ))
    
    fig.add_trace(go.Scatter(
        x=data.index,
        y=data.COSALPHA[:plot_size].T.values[0], 
        mode='lines', hoverinfo='x+y',
        name=r'$\cos\,\alpha$', marker={'color':'green'}
    ))
    fig.update_layout(
        title=go.layout.Title(
            text=r"$\text{{Апсиды орбиты КА MESSENGER и фаза орбиты Меркурия ({:%Y-%m-%d} - {:%Y-%m-%d}). }}$".format(
                min(data.index), max(data.index)
            ), x=0.5, xanchor='center'),
        xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text="Дата"))
    )
    
    fig.show()
data['EXTREMA'], data['COSALPHA'] = find_extrema(data)
#show_extrema(data, 24*60*10)
data
X_MSO Y_MSO Z_MSO BX_MSO BY_MSO BZ_MSO DBX_MSO DBY_MSO DBZ_MSO R ... X Y Z VX VY VZ VABS D EXTREMA COSALPHA
DATE
2012-07-19 00:00:00 806.203 4704.321 -1599.129 -8.371 -28.223 -49.810 5.329 4.761 4.302 5033.667938 ... 7.445013e+06 -6.815936e+07 -6.252117e+06 38.655274 7.792708 -2.910057 39.540169 6.884922e+07 0 NaN
2012-07-19 00:01:00 799.885 4745.575 -1737.642 -9.576 -25.721 -48.751 6.955 5.847 3.918 5116.609992 ... 7.447332e+06 -6.815889e+07 -6.252291e+06 38.655093 7.794371 -2.909905 39.540308 6.884903e+07 0 NaN
2012-07-19 00:02:00 793.075 4784.040 -1875.378 -7.972 -28.599 -43.524 7.648 5.359 5.077 5199.331622 ... 7.449651e+06 -6.815842e+07 -6.252466e+06 38.654911 7.796034 -2.909752 39.540447 6.884883e+07 0 NaN
2012-07-19 00:03:00 785.819 4819.807 -2012.057 -15.363 -22.648 -43.007 4.972 5.242 4.021 5281.706579 ... 7.451971e+06 -6.815796e+07 -6.252641e+06 38.654729 7.797697 -2.909600 39.540586 6.884863e+07 0 NaN
2012-07-19 00:04:00 778.143 4852.978 -2147.652 -9.397 -28.434 -42.773 7.009 6.553 4.060 5363.703115 ... 7.454290e+06 -6.815749e+07 -6.252815e+06 38.654547 7.799360 -2.909447 39.540725 6.884844e+07 0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2012-10-14 23:55:00 785.287 2623.216 1832.337 -18.145 -132.473 82.594 2.807 5.954 2.825 3294.752910 ... 7.535683e+06 -6.814099e+07 -6.258954e+06 38.648227 7.857560 -2.904085 39.545675 6.884152e+07 0 -0.218838
2012-10-14 23:56:00 806.209 2774.202 1713.205 -13.675 -112.477 89.612 4.001 5.507 1.780 3358.755880 ... 7.538002e+06 -6.814052e+07 -6.259128e+06 38.648043 7.859223 -2.903932 39.545815 6.884133e+07 0 -0.218838
2012-10-14 23:57:00 825.452 2919.384 1590.485 -9.331 -93.291 94.030 3.181 5.492 0.827 3425.465878 ... 7.540321e+06 -6.814005e+07 -6.259302e+06 38.647859 7.860886 -2.903780 39.545954 6.884113e+07 0 -0.218838
2012-10-14 23:58:00 843.077 3058.805 1464.623 -5.617 -76.563 95.951 2.157 4.110 1.455 3494.594023 ... 7.542640e+06 -6.813957e+07 -6.259476e+06 38.647675 7.862549 -2.903627 39.546094 6.884093e+07 0 -0.218838
2012-10-14 23:59:00 859.144 3192.543 1336.037 -3.257 -61.664 96.662 1.478 4.451 1.655 3565.873537 ... 7.544958e+06 -6.813910e+07 -6.259650e+06 38.647491 7.864212 -2.903474 39.546233 6.884074e+07 0 -0.218838

126720 rows × 21 columns

import plotly.graph_objects as go


fig = go.Figure(data=[go.Scatter3d(
    x=data["X_MSO"][:-70000],
    y=data["Y_MSO"][:-70000],
    z=data["Z_MSO"][:-70000],
    mode='lines',
    marker=dict(
        size=1,
        color='blue',                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)])

fig.add_trace(go.Scatter3d(
    x=[0],
    y=[0],
    z=[0],
    mode='markers',
    marker=dict(
        size=30,
        color='yellow',                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
))

# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), scene=dict(aspectmode='data'))
fig.show()
def find_nodes(data):
    extrema=pd.DataFrame(index=data.index)
    extrema['TYPE']=0

    apoapsis = find_peaks(data.R.values.round(0))[0]
    periapsis = find_peaks(1/data.R.values.round(0))[0]

    extrema.iloc[apoapsis] = 2
    extrema.iloc[periapsis] = 1
    return extrema, apoapsis, periapsis

nodes, _, _ = find_nodes(data)
def show_periapo(symbol="", first=0, last=None):
    nodes, apoapsis, periapsis = find_nodes(data)
    
    fig = go.Figure()
    
    xy = [[0, 0]]
    r = 2400
    kwargs = {'type': 'circle', 'xref': 'x', 'yref': 'y', 'fillcolor': 'orange', 'layer': 'below'}
    points = [go.layout.Shape(x0=-r, y0=-r, x1=r, y1=r, **kwargs)]
    fig.update_layout(shapes=points)

    frag = data.iloc[apoapsis][first:last]
    fig.add_trace(go.Scatter(
        x=frag.X_MSO.values,
        y=frag.Y_MSO.values,
        name="APO",
        mode="markers", marker={'color':'red'}
    ))
    
    
    frag = data.iloc[periapsis][first:last]
    fig.add_trace(go.Scatter(
        x=frag.X_MSO.values,
        y=frag.Y_MSO.values,
        name="PERI",
        mode="markers", marker={'color':'blue'}
    ))
    
    fig.update_layout(
        title=go.layout.Title(
            text=r"$\text{{Проекция апсидов орбиты КА MESSENGER на плоскость XY ({:%Y-%m-%d} - {:%Y-%m-%d}). }}$".format(
                min(data.index), max(data.index)
            ), x=0.5, xanchor='center'),
        xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text=r"MSO" + symbol + " X [км]")),
        yaxis=go.layout.YAxis(
            title=go.layout.yaxis.Title(text=r"MSO" + symbol + " Y [км]"),
            scaleanchor="x",
            scaleratio=1
        ),
    )
    fig.show()
    
    #plt.xlabel()
    #plt.ylabel(r"MSO" + symbol + " Y [км]")
    #plt.legend();
    
show_periapo()

We need to rotate the coordinate system in order to acknowledge the fact that Mercury’s own velocity is about 10% of solar wind velocity and is directed normally to MSO X axis. Solar wind velocity is taken constant at this time.

# The distribution of solar wind speeds during solar minimum: Calibration for numerical solar wind modeling constraints on the source of the slow solar wind
# S. L. McGregor  W. J. Hughes  C. N. Arge  M. J. Owens  D. Odstrcil
# https://doi.org/10.1029/2010JA015881
    
sw_velocity = 350 # km/s
# TODO: this is a crude approximation; investigate ways to improve it

def rotation_matrix(theta):
    c, s = np.cos(theta.values), np.sin(theta.values)
    np.ndarray(shape=(c.shape[0], 3, 3), dtype=float, order='F')
    R = np.vstack([[c, -s, [0]*c.shape[0]],
                   [s, c, [0]*c.shape[0]],
                   [[0]*c.shape[0], [0]*c.shape[0], [1]*c.shape[0]]
                  ])
    return R

def aberrate(data, sw_velocity):
    data = data.copy(True)
    data['ABERRATION_ANGLE'] = -np.arctan2(sw_velocity, data.VABS)
    rot = rotation_matrix(data['ABERRATION_ANGLE'][:data.shape[0]]).reshape(3, 3, data.shape[0]).transpose(2,0,1)

    triads = [
        ['X_MSO', 'Y_MSO', 'Z_MSO'],
        ['BX_MSO', 'BY_MSO', 'BZ_MSO'],
        ['DBX_MSO', 'DBY_MSO', 'DBZ_MSO']
    ]

    for t in triads:
        vec = np.array([ data[t].values[:data.shape[0]] ]).transpose(0,2,1)
        resx = np.matmul(rot, vec.T).reshape(data.shape[0], 3)
        for i in range(len(t)):
            data[t[i]] = resx.T[i]
    return data

#data = aberrate(data, sw_velocity)
# duskdawn = nodes[nodes["TYPE"]==1]
# coses = data.COSALPHA[(~np.isnan(data.COSALPHA.VALUE)) & (np.abs(data.COSALPHA.VALUE) < 0.05)].index
# duskdawn = duskdawn.index.intersection(coses)
def sw_orbit_data(data, nodes, orbit_no):
    ep = 100
    bx, by, bz = -43, 13, 0
    periapsis = nodes[nodes["TYPE"]==1]
    dt00 = periapsis.index[orbit_no] - pd.Timedelta("4h")
    dt01 = periapsis.index[orbit_no] - pd.Timedelta("120m")
    dt10 = periapsis.index[orbit_no] + pd.Timedelta("120m")
    dt11 = periapsis.index[orbit_no] + pd.Timedelta("4h")
    fragment0 = data[dt00:dt01].copy(True)
    fragment1 = data[dt10:dt11].copy(True)    
    rmse=np.mean([
        np.std(fragment0.BX_MSO),
        np.std(fragment0.BY_MSO),
        np.std(fragment0.BZ_MSO),
        np.std(fragment1.BX_MSO),
        np.std(fragment1.BY_MSO),
        np.std(fragment1.BZ_MSO)
                 ])

    if (math.fabs(np.mean(fragment0.BX_MSO)-np.mean(fragment1.BX_MSO))<ep and 
        math.fabs(np.mean(fragment0.BY_MSO)-np.mean(fragment1.BY_MSO))<ep and 
        math.fabs(np.mean(fragment0.BZ_MSO)-np.mean(fragment1.BZ_MSO))<ep):
        return rmse
            
        print(rmse)

min_i, min_abs = -1, 9999
for i in range(1090):
    absx = sw_orbit_data(data, nodes, i)
    if  absx < min_abs:
        min_abs = absx
        min_i = i
        

print(min_i, min_abs)
exp = orbit_data(data, nodes, min_i)
exp.X_MSO = exp.X_MSO*1e3
exp.Y_MSO = exp.Y_MSO*1e3
exp.Z_MSO = exp.Z_MSO*1e3
orbit_data(data, nodes, min_i)[["X_MSO", "Y_MSO", "Z_MSO", "BX_MSO", "BY_MSO", "BZ_MSO"]].to_csv("messenger_compare.csv".format(xyear))
def orbit_data(data, nodes, orbit_no):
    periapsis = nodes[nodes["TYPE"]==1]
    dt1 = periapsis.index[orbit_no] - pd.Timedelta("120m")
    dt2 = periapsis.index[orbit_no] + pd.Timedelta("120m")
    fragment = data[dt1:dt2].copy(True)
    fragment["BABS"] = np.sqrt(fragment.BX_MSO**2 + fragment.BY_MSO**2 + fragment.BZ_MSO**2)
    return fragment
    """
    timex = list(
        map(
            lambda dt64: datetime.utcfromtimestamp((dt64 - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')),
            fragment.index.values
        )
    )
    x, y, z = fragment.X_MSO, fragment.Y_MSO, fragment.Z_MSO
    bx, by, bz = fragment.BX_MSO, fragment.BY_MSO, fragment.BZ_MSO
    dbx, dby, dbz = fragment.DBX_MSO, fragment.DBY_MSO, fragment.DBZ_MSO
    r, rxy = fragment.R, fragment.RXY
            
    coords = np.vstack([x, y, z])
    values = np.vstack((bx, by, bz))
    err  = np.vstack([dbx, dby, dbz])
    babs = np.sqrt(bx**2 + by**2 + bz**2)
    # lo, hi = values - err, values + err
    return timex, coords, values, err, babs"""



def advplot(fragment, more_title=""):    
    fig = make_subplots(rows=2,
                        cols=1,
                        subplot_titles=["Magnetic field", "Coordinates"],
                        shared_xaxes=False,
                        shared_yaxes=False)
    
    
    lo = fragment[["BX_MSO", "BY_MSO", "BZ_MSO"]] - fragment[["DBX_MSO", "DBY_MSO", "DBZ_MSO"]].to_numpy()
    hi = fragment[["BX_MSO", "BY_MSO", "BZ_MSO"]] + fragment[["DBX_MSO", "DBY_MSO", "DBZ_MSO"]].to_numpy()
    
    s4=go.Scatter(
            x=pd.concat([pd.Series(fragment.index.values), pd.Series(fragment.index.values)]),
            y=pd.concat([fragment.BABS, -fragment.BABS]),
            mode='lines',
            name=r"$|B|$",
            marker_color='black',
        )
    
    fig.add_trace(
        s4,
        row=1,
        col=1
        )
    s4.on_click(update_point)
    
    for i, j, c in zip(["BX_MSO", "BY_MSO", "BZ_MSO"], ["X_MSO", "Y_MSO", "Z_MSO"], ['red', 'green', 'blue']):
        fig.add_trace(
            go.Scatter(
                x=fragment.index,
                y=lo[i],
                line=dict(width=0), 
                mode='lines',
                opacity=0.0,
                name=i.split("_")[0][-1],
                marker_color=c,
                fillcolor=c,
                showlegend=False,
            ),
            row=1,
            col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=fragment.index,
                y=hi[i],
                mode='lines',
                line=dict(width=0),
                opacity=0.0,
                name=i.split("_")[0][-1],
                marker_color=c,
                showlegend=False,
                fill='tonexty'
            ),
            row=1,
            col=1,
        )
    
        s3=go.Scatter(
                       x=fragment.index,
                       y=fragment[i],
                       mode='lines',
                       name=i.split("_")[0][-1],
                       marker_color=c
                      )  
               
        fig.add_trace(
                   s3,
                   row=1
                   col=1
                    )
        s3.on_click(update_point)
        
        
        fig.add_trace(
            go.Scatter(
                x=fragment.index,
                y=fragment[j],
                mode='lines',
                name=j,
                marker_color=c,
                showlegend=False
            ),
            row=2,
            col=1
        )
        
    fig.show()
   
    
def export_orbit(nodes, orbit_no, normalize=False, more_title=""):
    timex, coord, values, err, babs = orbit_data(nodes, orbit_no, normalize)
    
    pd.to_csv("orbit_{:04d}.dat".format(orbit_no + 1), date_format="%Y %m %d %H %M %S")
    
    f = open(fname, 'wb')
    for i in range(len(fragment)):
        line = " {} {:>13} {:>9} {:>9} {:>7} {:>7} {:>7}\n".format(fragment.index[i].strftime("%Y %m %d %H %M %S"), "{:.2f}".format(fragment.X_MSO[i]), "{:.2f}".format(fragment.Y_MSO[i]), "{:.2f}".format(fragment.Z_MSO[i]), "{:.2f}".format(fragment.BX_MSO[i]), "{:.2f}".format(fragment.BY_MSO[i]), "{:.2f}".format(fragment.BZ_MSO[i]))
        f.write(line.encode('utf8'))
advplot(orbit_data(data, nodes, 1))
 
    
    """
    with plt.style.context(('classic')):
        mp.rcParams['font.size'] = 22
        mp.rcParams['axes.labelsize'] = 32
        #mp.rcParams['figure.figsize'] = (15, 10)
        fig = plt.figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
        ax = fig.add_subplot(211)
        plt.grid(which='minor')
        plt.grid(which='major')
        plt.subplots_adjust(bottom=-1.0)
        plt.setp(ax.xaxis.get_majorticklabels(), rotation="vertical")
        if more_title != "":
            more_tile = " :: " + more_title
        plt.title("%s%s" % (timex[0].strftime("%Y-%m-%d"), more_title))

        #labels = { "coord": [r"$X_{MSO}$", r"$Y_{MSO}$", r"$Z_{MSO}$"], "bfield": [r"$B_x$", r"$B_y$", r"$B_z$"]}
        #colors = ["red", "green", "blue"]
        #lines = []
        #for i in range(values.shape[0]):
        #    line = ax.plot(timex, values[i], label=labels["bfield"][i], color=colors[i])
        #    lines.append(line)
        #ax.plot(timex, babs, color='k', linewidth=1.4)
        #ax.plot(timex, -babs, color='k', linewidth=1.4)
        
        
        plt.ylabel("Магнитное поле [нТл]")
        diffax = ax.twinx()
        

        deltab = np.abs(np.diff(babs))
        diffax.set_ylim((0, np.max(deltab)*2))
        line = diffax.plot(timex[:-1], deltab, color='magenta', label=r'$\Delta$B')
        lines.append(line)
        for tl in diffax.get_yticklabels():
            tl.set_color('magenta')
        diffax.set_ylabel(r'$|\Delta B_{abs}|$ [нТл]', color='magenta')
        #lines = lines[0] + lines[1] + lines[2] + lines[3]
    
        ax.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
        ax.xaxis.set_minor_locator(AutoMinorLocator(4))
        
        ax.legend()
        

        ax = fig.add_subplot(212)
        plt.grid(which='minor')
        plt.grid(which='major')
        ax.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
        ax.xaxis.set_minor_locator(AutoMinorLocator(4))
        plt.setp(ax.xaxis.get_majorticklabels(), rotation="vertical")
        plt.xlabel("Время")
        plt.ylabel("Расстояние [км]")
        #ax.plot(fragment.RHO_DIPOLE, color='k', linewidth=2, label=r'$\rho_{D}$')
        for i in range(coords.shape[0]):
            line = ax.plot(timex, coords[i], label=labels["coord"][i], color=colors[i])
            lines.append(line)
        ax.legend()"""
    
first_orbit = 0
for i in range(nodes[nodes.TYPE==1].shape[0]):
    export_orbit(nodes, first_orbit + i, True, False)
plt.plot(data["BZ_MSO"])
# http://ccmc.gsfc.nasa.gov/RoR_WWW/presentations/Dipole.pdf
# rho   - total distance of the spacecraft to Mercury's center in km
# theta - hermomagnetic latitude in radians (NOT degrees)
# B_0   - hermomagnetic dipole field approximation
# Bx = 3M xz/r^5
# By = 3M yz/r^5
# Bz = M (3z^2-r^2)/r^5

#data["BABS_DIPOLE"]=np.abs(M_0)*np.sqrt(1 + 3*np.cos(data.THETA_DIPOLE)**2) / data.RHO_DIPOLE**3
#data["BX_DIPOLE"]=3*M_0*z*x / data.RHO_DIPOLE**5
#data["BY_DIPOLE"]=3*M_0*z*y / data.RHO_DIPOLE**5
#data["BZ_DIPOLE"]=M_0*(3*z**2 - data.RHO_DIPOLE**2)/data.RHO_DIPOLE**5
    
# Do not modify the data array, just calc along what's been given
def dipole_profile(coords, psi, offset_x, offset_y, offset_z):
    x, y, z = coords.X_AB + offset_x, coords.Y_AB + offset_y, coords.Z_AB + offset_z
    #if aberrated:
    #    x, y, z = data.X_AB + offset[0], data.Y_AB + offset[1], data.Z_AB + offset[2]
    #else:
    #    x, y, z = data.X_MSO + offset[0], data.Y_MSO + offset[1], data.Z_MSO + offset[2]
    rho=np.sqrt(x**2 + y**2 + z**2)
    theta=np.arcsin(z/rho)    
    babs=np.abs(M_0)/ rho**4 * np.sqrt(3*z**2 + rho**2)
    p=z*np.cos(psi) - x*np.sin(psi)
    Br=M_0/rho**5
    bx=-Br*(- rho**2 *np.sin(psi) - 3.*x*p)
    by= Br*(3.*y*p)
    bz=-Br*(rho**2 *np.cos(psi) - 3.*z*p)
    return bx, by, bz


def dipole_magnitude(coords, psi, offset_x, offset_y, offset_z):
    bx, by, bz = dipole_profile(coords, psi, offset_x, offset_y, offset_z)
    field_dipole = np.sqrt(bx**2 + by**2 + bz**2)
    return field_dipole


#dipole_profile(data)
def exp_vs_dipole(nodes, orbit_no, aberrated=True, more_title="", plot_enabled=True):
    periapsis = nodes[nodes["TYPE"]==1]
    dt1 = periapsis.index[orbit_no] - pd.Timedelta("1h")
    dt2 = periapsis.index[orbit_no] + pd.Timedelta("1h")
    fragment = data[dt1 : dt2]
    
    if aberrated:
        x, y, z = fragment["X_AB"], fragment["Y_AB"], fragment["Z_AB"]
        bx, by, bz = fragment["BX_AB"], fragment["BY_AB"], fragment["BZ_AB"]
        dbx, dby, dbz = fragment["DBX_AB"], fragment["DBY_AB"], fragment["DBZ_AB"]
        rho, rxy = fragment["RHO_AB"], fragment["RXY_AB"]
    else:
        x, y, z = fragment["X_MSO"], fragment["Y_MSO"], fragment["Z_MSO"]
        bx, by, bz = fragment["BX_MSO"], fragment["BY_MSO"], fragment["BZ_MSO"]
        dbx, dby, dbz = fragment["DBX_MSO"], fragment["DBY_MSO"], fragment["DBZ_MSO"]
        rho, rxy = fragment["RHO"], fragment["RXY"]
    
    coords = pd.concat([x, y, z], axis=1)
    field_exp = np.sqrt(bx**2 + by**2 + bz**2)
    
    popt, pcov = curve_fit(dipole_magnitude, coords, field_exp)
    #print(popt)
    dipolex, dipoley, dipolez = dipole_profile(coords, popt[0], popt[1], popt[2], popt[3])
    #fragment['BX_DIPOLE'], fragment['BY_DIPOLE'], fragment['BZ_DIPOLE']
    
    
    field_dipole = np.sqrt(dipolex**2 + dipoley**2 + dipolez**2) # fragment['BABS_DIPOLE']
    
    #print(field_exp.idxmax(), field_dipole.idxmax())
    if plot_enabled:
        plt.plot(field_exp, color='b')
        plt.plot(field_dipole, color='g')
        plt.plot(fragment['RHO_DIPOLE']/100, color='k')
        plt.xlabel("Время")
        plt.ylabel("Магнитное поле [нТл]")
        plt.gca().xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
        plt.gca().xaxis.set_minor_locator(AutoMinorLocator(4))
    return 

dipole_params = []

for i in range(0, 4000):
    dipole_params.append(exp_vs_dipole(nodes, i, ab, "", False))
import sys,os,os.path
from datetime import datetime
from matplotlib import dates

from matplotlib.ticker import AutoMinorLocator, LinearLocator, AutoLocator
import matplotlib as mpl

base_name = "MAGMSOSCIAVG%s%03d_%s_V06.TAB"
data_limits = { 2011: [82, 365], 2012: [1, 366], 2013: [1, 365], 2014: [1, 365], 2015: [1, 120] }

def advanced_plot(pictype, data, normalize=False, more_title=""):
    data_np=data.as_matrix().T
    x, y, z = data_np[0], data_np[1], data_np[2]
    if normalize:
        bx, by, bz = data_np[3] - data_np[-3], data_np[4] - data_np[-2], data_np[5] - data_np[-1]
    else:
        bx, by, bz = data_np[3], data_np[4], data_np[5]
    dbx, dby, dbz = data_np[6], data_np[7], data_np[8]
    # We drop the last record from the input file as it belongs to the next day
    timex = map(lambda dt64: datetime.utcfromtimestamp((dt64 - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')), data.index.values)

    if pictype == "bfield":
        data = np.vstack([bx, by, bz])
        err  = np.vstack([dbx, dby, dbz])
        babs = np.apply_along_axis(lambda p: np.sqrt(sum(p**2)), 0, data)
    elif pictype == "coord":
        data = np.vstack([x, y, z])
        err  = 0.0
    
    lo, hi = data - err, data + err
    data, lo, hi = data, lo, hi

    fig = plt.figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
    ax = fig.add_subplot(111)
    ax.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
    ax.xaxis.set_minor_locator(AutoMinorLocator(4))
    plt.grid(which='minor')
    plt.grid(which='major')
    plt.setp(ax.xaxis.get_majorticklabels(), rotation="vertical")
    plt.subplots_adjust(bottom=.3)
    plt.title("%s :: %s" % (timex[0].strftime("%Y-%m-%d"), more_title))


    labels = { "coord": ["x", "y", "z"], "bfield": ["Bx", "By", "Bz"]}
    colors = ["blue", "magenta", "red"]

    lines = []
    for i in range(data.shape[0]):
        line = plt.plot(ax, timex, data[i], label=labels[pictype][i], color=colors[i])
        lines.append(line)
        plt.fill_between(timex, lo[i], hi[i], facecolor=colors[i], interpolate=True, alpha=0.3, color="none", edgecolor="k", linewidth=0.0)

    if pictype == 'bfield':
        plt.plot(ax, timex, babs, color='k', linewidth=1.4)
        plt.plot(ax, timex, -babs, color='k', linewidth=1.4)
        diffax = ax.twinx()
        deltab = np.apply_along_axis(lambda p: np.sqrt(sum(p**2)), 0, np.diff(data, axis=1))
        line = diffax.plot(timex[:-1], deltab, color='green', label=r'$\Delta$B')
        lines.append(line)
        for tl in diffax.get_yticklabels():
            tl.set_color('green')
        ax.set_ylabel(r'$B$ [nT]', color='k')
        diffax.set_ylabel(r'$|\Delta B|$ [nT]', color='green')
        lines = lines[0] + lines[1] + lines[2] + lines[3]
    else:
        ax.set_ylabel(r'Distance to Mercury center [km]', color='k')
        lines = lines[0] + lines[1] + lines[2]

    labels = [l.get_label() for l in lines]
    ax.legend(lines, labels, loc=1)


def a2000_vs_dipole_plot(data, component):
    data_np=data.as_matrix().T
    x, y, z = data_np[0], data_np[1], data_np[2]
    if component == "x":
        bx, by, bz = data_np[3], data_np[-3], data_np[3] - data_np[-3]
    elif component == "y":
        bx, by, bz = data_np[4], data_np[-2], data_np[4] - data_np[-2]
    elif component == "z":
        bx, by, bz = data_np[5], data_np[-1], data_np[5] - data_np[-1]
    dbx, dby, dbz = data_np[6], data_np[7], data_np[8]
    # We drop the last record from the input file as it belongs to the next day
    timex = map(lambda dt64: datetime.utcfromtimestamp((dt64 - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')), data.index.values)

    data = np.vstack([bx, by, bz])
    err  = np.vstack([dbx, dby, dbz])
    babs = np.apply_along_axis(lambda p: np.sqrt(sum(p**2)), 0, data)
    
    lo, hi = data - err, data + err
    data, lo, hi = data, lo, hi

    fig = plt.figure(num=None, figsize=(12, 6), dpi=80, facecolor='w', edgecolor='k')
    ax = fig.add_subplot(111)
    ax.xaxis.set_major_formatter(dates.DateFormatter('%H:%M'))
    ax.xaxis.set_minor_locator(AutoMinorLocator(4))
    plt.grid(which='minor')
    plt.grid(which='major')
    plt.setp(ax.xaxis.get_majorticklabels(), rotation="vertical")
    plt.subplots_adjust(bottom=.3)
    plt.title(timex[0].strftime("%Y-%m-%d"))


    labels = { "bfield": ["$B_{data}$", "$B_{dipole}$", "$\Delta B$"]}
    colors = ["red", "blue", "magenta"]

    lines = []
    for i in range(data.shape[0]):
        line = ppl.plot(ax, timex, data[i], label=labels["bfield"][i], color=colors[i])
        lines.append(line)
        ppl.fill_between(timex, lo[i], hi[i], facecolor=colors[i], interpolate=True, alpha=0.3, color="none", edgecolor="k", linewidth=0.0)

    ax.set_ylabel(r'$B$ [nT]', color='k')
    lines = lines[0] + lines[1] + lines[2]

    labels = [l.get_label() for l in lines]
    ax.legend(lines, labels, loc=1)
def save_preprocessed(data):
    data.to_csv("/home/dp/messenger/uru/data_disser.csv")
# save_preprocessed(data)
# data = pd.read_csv("/home/dp/messenger/uru/data_disser.csv")
def select_orbit_section(base_cos, cos_spread):
    threshold = np.where((cos_array < base_cos+cos_spread) & (cos_array > base_cos-cos_spread))[0]
    return np.array(list(set(np.where(cerc_vec==-1)[0]).intersection(threshold)))
        
def compare_approach_types():
    cos_spread = 0.002
    for base_cos in np.arange(-0.95, 0.95, 0.05):
        closest_approaches = select_orbit_section(base_cos, cos_spread)
        for appr in closest_approaches[:1]:
            advanced_plot("bfield", data_ab[dt[appr-spread]:dt[appr+spread]], True, base_cos)
            advanced_plot("coord", data_ab[dt[appr-spread]:dt[appr+spread]], False, base_cos)
        
def get_x_spread(approaches):
    deltas = []
    for appr in approaches:
        _min = data_ab["X_MSO"][dt[appr-spread]:dt[appr+spread]].min()
        _max = data_ab["X_MSO"][dt[appr-spread]:dt[appr+spread]].max()
        deltas.append(_max - _min)
    return reduce(lambda x, y: x + y, deltas) / len(deltas)
    
# Selected manually to achieve minimum X_MSO spread between points of entry and departure
base_cos = 0.0
cos_spread = 0.3

#for cos_spread in np.arange(0.1, 0.9, 0.1):
#    closest_approaches = select_orbit_section(base_cos, cos_spread)
#    print cos_spread, len(closest_approaches)*1.0/np.where(cerc_vec==-1)[0].shape[0], get_x_spread(closest_approaches)

# Number of points around each closest approach
spread = 100
closest_approaches = select_orbit_section(base_cos, cos_spread)
f1 = open("orbits-times.txt", "w")
f2 = open("orbits-to-cluster.csv", "w")
for appr in closest_approaches:
    df1 = data_ab[["BX_MSO", "BY_MSO", "BZ_MSO"]][dt[appr-spread]:dt[appr+spread]]
    df2 = data_ab[["B_X", "B_Y", "B_Z"]][dt[appr-spread]:dt[appr+spread]]
    df = df1[["BX_MSO", "BY_MSO", "BZ_MSO"]]
    df['BX_MSO'] = df1.BX_MSO - df2.B_X
    df['BY_MSO'] = df1.BY_MSO - df2.B_Y
    df['BZ_MSO'] = df1.BZ_MSO - df2.B_Z
    if not df.isnull().any().any():
        f2.write(",".join(map(lambda x: str(x), df.values.flatten(order='F'))))
        f2.write("\n")
        f1.write(str(data_ab[["BX_MSO", "BY_MSO", "BZ_MSO"]][dt[appr]:dt[appr]].index.values[0]))
        f1.write("\n")
f1.close()
f2.close()
periapsis, apoapsis = argrelextrema(data['RHO_DIPOLE'].values, np.greater), argrelextrema(data['RHO_DIPOLE'].values, np.less)

peri = data.iloc[periapsis];
babs = np.sqrt(peri.BX_AB**2 + peri.BY_AB**2 + peri.BZ_AB**2)

apo = data.iloc[apoapsis];
babs = np.sqrt(apo.BX_AB**2 + apo.BY_AB**2 + apo.BZ_AB**2)

plt.hist(babs[~np.isnan(babs)], bins=100);