### Global imports
import dill
from cse6040_devkit import plugins, utils


from pprint import pprint, pformat

import numpy as np
import pandas as pd
import sqlite3
import random
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector

# Load starting data and define a database connection
stock_data = utils.load_object_from_publicdata('raw_stock_data_nflx.dill')

conn = sqlite3.connect('resource/asnlib/publicdata/NetflixDb.sqlite_db')


stock_subsample_df_demo_CORRECT = utils.load_object_from_publicdata('stock_subsample_df_demo.dill')
convert_timezone_types          = utils.load_object_from_publicdata('convert_timezone_types.dill')
display(stock_subsample_df_demo_CORRECT)


### Solution - Exercise 0  
def load_data(records: list) -> pd.DataFrame:
    ### BEGIN SOLUTION
    
    # A four-line solution, for the TAs --------------------------------------------
    # full_data = pd.DataFrame(records)
    # full_data['Date'] = convert_timezone_types(full_data['Date'])
    # full_data = full_data.rename(columns={'Stock': 'Firm'})
    # return full_data
    # ------------------------------------------------------------------------------
    
    # Build the full DataFrame
    full_data = pd.DataFrame(records)
    
    # Convert the data type (Here are two suggested approaches)
    ### APPROACH 1: Pass in a series
    date_vector_1 = convert_timezone_types(full_data['Date'])
    ### APPROACH 2: Use .apply and timezone conversion ---------------------
    # date_vector_2 = full_data['Date'].apply(convert_timezone_types)
    ### They should be the same thing!
    # assert (date_vector_1 == date_vector_2).all() 
    full_data['Date'] = date_vector_1
    
    # Rename the columns
    full_data = full_data.rename(columns={'Stock': 'Firm'})

    # Return our dataframe
    return full_data
    ### END SOLUTION

### Demo function call
### `convert_timezone_types` demo ---------------------------------------------
# This helper function will help you change the data-type
# -- Input:  pd.Series, with dtype "string"
# -- Output: pd.Series, with dtype "datetime64[ns, America/New_York]"
type_conversion_demo = pd.DataFrame({
    'Date': ['2017-11-15 00:00:00-05:00', '2021-01-15 00:00:00-05:00'],
    'DemoColumn': [1, 2]
})
# Convert the type!
type_conversion_demo['Date'] = convert_timezone_types(type_conversion_demo['Date'])
assert type_conversion_demo['Date'].dtype == 'datetime64[ns, America/New_York]' # It works!


### `load_data` demo ----------------------------------------------------------
# We'll sample the dataset to 20 records, just to make debugging easier.
random.seed(6040)
stock_data_subsample = random.sample(stock_data, k=10)
# Here's what your solution produces!
print('Here is what your loaded data looks like in Pandas:')
stock_subsample_df = load_data(stock_data_subsample)
display(stock_subsample_df)


### Test Cell - Exercise 0  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(load_data),
              ex_name='load_data',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to load_data did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(load_data),
              ex_name='load_data',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to load_data did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


### Run Me!!!
stock_df = utils.load_object_from_publicdata('stock_df')


### Solution - Exercise 1  
def preview_data(records: list, conn: sqlite3.Connection) -> tuple:
    # Filter out any records beyond the second element
    records_preview = records.head(n=10)

    # Select everything from nflx_originals and randomly sample 5 rows
    nflx_originals_preview = pd\
        .read_sql('SELECT * FROM nflx_originals', conn)\
        .sample(5)

    # Select everything from nflx_top and randomly sample 5 rows
    nflx_top_preview = pd\
        .read_sql('SELECT * FROM nflx_top', conn)\
        .sample(5)

    return records_preview, nflx_originals_preview, nflx_top_preview

### Demo function call
records_preview, nflx_originals, nflx_top = preview_data(stock_df, conn)
print('Preview of Stock Data')
display(records_preview)
print('------------------------------------------------------------------------')
print('Preview of Netflix Originals Table')
display(nflx_originals)
print('------------------------------------------------------------------------')
print('Preview of Netflix Top Programs Table')
display(nflx_top)


### Test Cell - Exercise 1  


print('Passed! Please submit.')


### Run Me!!!
calculate_daily_growth_demo_input = utils.load_object_from_publicdata('calculate_daily_growth_demo_input')


print("Demo output for Netflix:")

daily_growth_df_CORRECT = utils.load_object_from_publicdata('daily_growth_df.dill')
display(daily_growth_df_CORRECT)


### Solution - Exercise 2  
def calculate_daily_growth(stock_records: pd.DataFrame, firm: str) -> pd.DataFrame:
    ### BEGIN SOLUTION

    # Filter the DataFrame for the stock
    stockdf = stock_records[stock_records['Firm']==firm].copy(deep=True)

    # Remove NA values in a copy.
    # We'll join everything back together at the end.
    stockdf2 = stockdf.copy(deep=True)
    stockdf2 = stockdf2.dropna()

    # Lag the close and calculate the daily growth
    stockdf2['Yest_Close'] = stockdf2.groupby('Firm')['Close'].shift(1)
    stockdf2['Day_Growth'] = (stockdf2['Close']/stockdf2['Yest_Close']) - 1

    # Merge everything back together and fill missing values with 0
    output = pd.merge(
        stockdf,
        stockdf2,
        how='outer',
        left_index=True,
        right_index=True,
        suffixes=(None, '_y'))[
            ['Date', 'Firm', 'Open', 'Close', 'Volume', 'Day_Growth']
        ]
    output['Day_Growth'] = output['Day_Growth'].fillna(0)

    # Return the result
    return output.reset_index(drop=True)
    ### END SOLUTION

### Demo function call
print("Demo daily growth for Netflix's stock:")
daily_growth_nflx_demo = calculate_daily_growth(
    stock_records=calculate_daily_growth_demo_input,
    firm='NFLX'
)
display(daily_growth_nflx_demo)


### Test Cell - Exercise 2  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(calculate_daily_growth),
              ex_name='calculate_daily_growth',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to calculate_daily_growth did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(calculate_daily_growth),
              ex_name='calculate_daily_growth',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to calculate_daily_growth did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


### Run Me!!!
stock_daily_growths_df = utils.load_object_from_publicdata('stock_daily_growths_df')
stock_daily_growths_reduced_df = utils.load_object_from_publicdata('stock_daily_growths_reduced_df')


print("Demo annual growth:")

annual_growth_demo_df_CORRECT = utils.load_object_from_publicdata('annual_growth_demo_df.dill')
display(annual_growth_demo_df_CORRECT)

### Uncomment these lines for additional information!
# print("Annual growth, grouped together:")

# annual_growth_grouped_df_demo = utils.load_object_from_publicdata('annual_growth_grouped_df_demo.dill')
# display(annual_growth_grouped_df_demo)


### Solution - Exercise 3  
def calculate_annual_growth(stock_records: pd.DataFrame) -> pd.DataFrame:
    ### BEGIN SOLUTION

    # Create a copy
    stockdf = stock_records.copy(deep=True)

    # Extract the year
    stockdf['Year'] = stockdf.Date.dt.strftime('%Y')

    # Create the growth ratio and calculate the annual growth
    stockdf['Growth_Ratio'] = 1 + stockdf['Day_Growth']
    stockdf2 = stockdf[['Year', 'Growth_Ratio', 'Firm']].copy(deep=True)
    resultdf = stockdf2.groupby(['Year', 'Firm'])\
        .prod('Growth_Ratio') - 1
    
    # Merge everything back together
    resultdf = resultdf.reset_index()
    resultdf.columns = ['Year', 'Firm', 'Annual_Growth']
    output = pd.merge(stockdf, resultdf)[
        ['Date', 'Year', 'Firm', 'Open', 'Close', 'Volume', 'Day_Growth', 'Annual_Growth']
    ]

    # Return the output
    return output
    ### END SOLUTION

### Demo function call
annual_growth_demo_df = calculate_annual_growth(stock_records=stock_daily_growths_reduced_df)
display(annual_growth_demo_df)


### Test Cell - Exercise 3  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(calculate_annual_growth),
              ex_name='calculate_annual_growth',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to calculate_annual_growth did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(calculate_annual_growth),
              ex_name='calculate_annual_growth',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to calculate_annual_growth did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


### Run Me!!!
annual_growth_df = utils.load_object_from_publicdata('annual_growth_df')


print("Example Golden Cross Demo:")

golden_cross_demo_df_CORRECT = utils.load_object_from_publicdata('golden_cross_demo_df.dill')
display(golden_cross_demo_df_CORRECT.head(15))


### Solution - Exercise 4  
def golden_cross_strategy(stockdf: pd.DataFrame, firm: str, short_average: int, long_average: int) -> pd.DataFrame:
    ### BEGIN SOLUTION
    
    # Copy the inputs and filter where necesary
    stockdf = stockdf.copy(deep=True)
    stockdf = stockdf[stockdf['Firm'] == firm]
    stockdf = stockdf[stockdf.notna()['Close']]

    # Calculate the moving averages
    stockdf['Short_Average'] = stockdf['Close'].rolling(short_average).mean()
    stockdf['Long_Average'] = stockdf['Close'].rolling(long_average).mean()

    # Calculate the boolean indicator
    stockdf['Golden_Cross'] = stockdf['Short_Average'] > stockdf['Long_Average']

    # Return the result
    return stockdf.reset_index(drop=True)
    ### END SOLUTION

### Demo function call
golden_cross_demo_df = golden_cross_strategy(
    stockdf=annual_growth_df,
    firm='NFLX',
    short_average=3,
    long_average=7)
# Let's only look at the first 15 days for our input
display(golden_cross_demo_df.head(15))


### Test Cell - Exercise 4  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(golden_cross_strategy),
              ex_name='golden_cross_strategy',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to golden_cross_strategy did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sqlite_blocker(golden_cross_strategy),
              ex_name='golden_cross_strategy',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to golden_cross_strategy did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


### Run Me!!!
golden_crosses_df = utils.load_object_from_publicdata('golden_crosses_df')
golden_crosses_reduced_df = utils.load_object_from_publicdata('golden_crosses_reduced_df')


print('Properly Averaged Industry Growth:')

normalized_returns_demo_df_CORRECT = utils.load_object_from_publicdata('normalized_returns_demo_df.dill')
display(normalized_returns_demo_df_CORRECT)


### Solution - Exercise 5  
def normalize_stock_growths(daily_growth_by_firm: pd.DataFrame, firm: str) -> pd.DataFrame:
    # Create a list of every firm, except for the one we specified in the inputs
    non_chosen_firms = set(daily_growth_by_firm['Firm']) - set([firm])
    non_chosen_firms = list(non_chosen_firms)

    # Pivot the data for easy manipulation
    pivoted_data = daily_growth_by_firm.pivot(
        index='Date', 
        columns='Firm',
        values='Day_Growth'
        ).reset_index()

    # Calculate the total growth for every firm that isn't our firm
    pivoted_data[f'Non_{firm}_Average_Growth'] = pivoted_data[non_chosen_firms]\
        .sum(axis=1) / len(non_chosen_firms)
    
    return pivoted_data

### Demo function call
normalized_returns_demo_df = normalize_stock_growths(golden_crosses_reduced_df, 'NFLX')
display(normalized_returns_demo_df)


### Test Cell - Exercise 5  


print('Passed! Please submit.')


print('A proper summary of genres for the demo:')

netflix_original_genre_summary_df_CORRECT = utils.load_object_from_publicdata('netflix_original_genre_summary_df.dill')
display(netflix_original_genre_summary_df_CORRECT)


### Solution - Exercise 6  
def summarize_netflix_original_genres()-> str:
    ### BEGIN SOLUTION
    query = '''
            SELECT
                STRFTIME("%Y", premiere) AS year,
                genre,
                COUNT(genre) AS genre_count
            FROM nflx_originals no2
            GROUP BY genre, year
            ORDER BY 
                genre_count DESC,
                year,
                genre
            LIMIT 10
            '''
    return query
    ### END SOLUTION

### Demo function call
summary_query = summarize_netflix_original_genres()
netflix_original_genres = pd.read_sql(summary_query, conn)
display(netflix_original_genres)


### Test Cell - Exercise 6  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sql_executor(summarize_netflix_original_genres),
              ex_name='summarize_netflix_original_genres',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to summarize_netflix_original_genres did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sql_executor(summarize_netflix_original_genres),
              ex_name='summarize_netflix_original_genres',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to summarize_netflix_original_genres did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


print('A proper summary of show score metrics:')

netflix_top_10_scores_df_CORRECT = utils.load_object_from_publicdata('netflix_top_10_scores_df.dill')
display(netflix_top_10_scores_df_CORRECT)


### Solution - Exercise 7  
def calc_netflix_top10_scores() -> str:
    ### BEGIN SOLUTION
    # An approaching using CTEs
    ctequery = '''
    WITH scores AS (
        SELECT 
            sum(11 - rank) AS total_score,
            count(title) AS occurrence,
            title
        FROM nflx_top
        GROUP BY title
        ORDER BY total_score DESC, title DESC)
    SELECT
        title,
        total_score,
        occurrence,
        (total_score * 1.0 / occurrence) as avg_score
    FROM scores
    LIMIT 10;
    '''
    # An approaching without using CTEs
    query='''
        SELECT title,
            sum(11 - rank) AS total_score,
            count(title) AS occurrence,
            1.0*sum(11 - rank)/count(title) avg_score
        FROM nflx_top
        GROUP BY title
        ORDER BY total_score DESC, title DESC
        LIMIT 10
    '''

    return ctequery
    ### END SOLUTION

### Demo function call
scores_query = calc_netflix_top10_scores()
netflix_top10_scores = pd.read_sql(scores_query, conn)
display(netflix_top10_scores)


### Test Cell - Exercise 7  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sql_executor(calc_netflix_top10_scores),
              ex_name='calc_netflix_top10_scores',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to calc_netflix_top10_scores did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sql_executor(calc_netflix_top10_scores),
              ex_name='calc_netflix_top10_scores',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to calc_netflix_top10_scores did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


longterm_avg_vikings_breakout_df_CORRECT = utils.load_object_from_publicdata('longterm_avg_vikings_breakout_df.dill')
display(longterm_avg_vikings_breakout_df_CORRECT)


longterm_avg_scores_df = utils.load_object_from_publicdata('longterm_avg_scores_df.dill')
display(longterm_avg_scores_df)


### Solution - Exercise 8  
def longterm_avg_score():
    ### BEGIN SOLUTION
    # An approaching using CTEs
    ctequery='''
            WITH 
            distinct_dates as (
                SELECT DISTINCT date FROM nflx_top), 
            title_dates as (
                SELECT no.title, no.premiere, dd.date
                FROM nflx_originals no
                LEFT JOIN distinct_dates dd
                ON no.premiere <= dd.date), 
            title_date_score as (
                SELECT td.title,  td.date, 
                    (11 - nt.rank) score
                FROM title_dates td
                LEFT JOIN nflx_top nt
                ON nt.title = td.title AND nt.date = td.date)
            -- 
            -- Main Query
            --
            SELECT title,
                sum(score) total_score,
                count(date) date_count,
                sum(score) * 1.0 / count(date) longterm_avg_score
            FROM title_date_score
            GROUP BY title
            HAVING longterm_avg_score is not null
            ORDER BY longterm_avg_score DESC, title
            LIMIT 10
            '''
    # An approach without using CTEs
    query = '''
            SELECT
                n_orig.title,
                SUM(11 - n_top.rank) total_score,
                COUNT(n_orig.title) date_count,
                1.0 * SUM(11 - n_top.rank) / COUNT(n_orig.title) AS longterm_avg_score
            FROM nflx_originals n_orig
            LEFT JOIN (
            SELECT
                DISTINCT date
            FROM nflx_top
            ) all_dates
                ON n_orig.premiere <= all_dates.date
            LEFT JOIN nflx_top n_top
                ON n_top.title = n_orig.title
                AND n_top.date = all_dates.date
            GROUP BY n_orig.title
            HAVING longterm_avg_score is not null
            ORDER BY
                longterm_avg_score DESC,
                n_orig.title
            LIMIT 10
            '''
    return query
    ### END SOLUTION

### Demo function call
query = longterm_avg_score()
longterm_avg_scores = pd.read_sql(query, conn)
display(longterm_avg_scores)


### Test Cell - Exercise 8  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=plugins.sql_executor(longterm_avg_score),
              ex_name='longterm_avg_score',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to longterm_avg_score did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=plugins.sql_executor(longterm_avg_score),
              ex_name='longterm_avg_score',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to longterm_avg_score did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')


### Run Me!!!
nflx_scores_demo_df = utils.load_object_from_publicdata('nflx_scores_demo_df')
normalized_growths_df = utils.load_object_from_publicdata('normalized_growths_df')


print('A proper model matrix for the demo:')

model_matrix_demo_df_CORRECT = utils.load_object_from_publicdata('model_matrix_demo_df.dill')
display(model_matrix_demo_df_CORRECT)


### Solution - Exercise 9  
def construct_model_inputs(normalized_growths_df: pd.DataFrame, show_scores_df: pd.DataFrame) -> pd.DataFrame:
    ### BEGIN SOLUTION
    show_scores_filled = show_scores_df.pivot(index='date', columns='title', values='score').fillna(0)
    all_shows = show_scores_filled.columns
    model_matrix = show_scores_filled.merge(normalized_growths_df, left_on='date', right_on='Date')
    return model_matrix[['Relative_Growth', *all_shows]]
    ### END SOLUTION

### Demo function call
model_matrix = construct_model_inputs(normalized_growths_df, nflx_scores_demo_df)
display(model_matrix)


### Test Cell - Exercise 9  

# Load testing utility
with open('resource/asnlib/publicdata/execute_tests', 'rb') as f:
    execute_tests = dill.load(f)

# Execute test
passed, test_case_vars = execute_tests(func=construct_model_inputs,
              ex_name='construct_model_inputs',
              key=b'a9pptsbNJew33Kjoo3e2RkFk0FT2N1lXclu3U9zq8Aw=', 
              n_iter=100)
# Assign test case vars for debugging
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars

assert passed, 'The solution to construct_model_inputs did not pass the test.'

### BEGIN HIDDEN TESTS
passed, test_case_vars = execute_tests(func=construct_model_inputs,
              ex_name='construct_model_inputs',
              key=b'deVhBT3kO4xkGnbJLP5zPloS7UHLzunkF-JOvkYVvhI=', 
              n_iter=10,
              hidden=True)
input_vars, original_input_vars, returned_output_vars, true_output_vars = test_case_vars
assert passed, 'The solution to construct_model_inputs did not pass the test.'
### END HIDDEN TESTS
print('Passed! Please submit.')

##############################################
### The Code below is provided for illustrative and reproducability but is commented out due to additional time processing to the auto-grader. Note, the model was run on all NFLX growth data and top shows data.
##############################################
# Let's use step-wise regression to do feature selection
mm=model_matrix.copy(deep=True)
y = mm['Relative_Growth']
del mm['Relative_Growth']
X = mm
import statsmodels.api as sm
model = LinearRegression()
feature_selector = SequentialFeatureSelector(model, n_features_to_select=min(len(X.columns)-1,10))
feature_selector.fit(X, y)
selected_X = X[feature_selector.get_feature_names_out()]
selected_X = sm.add_constant(selected_X)
print(selected_X.columns)
selected_model = sm.OLS(y, selected_X)
results = selected_model.fit()
results.summary()

`Midterm 2, Fall 2024`: `Netflix and Bills`¶

The Problem: Analyzing Netflix's Stocks and Services¶

Exercise 0: (1 points)¶

Exercise 1: (1 points) - FREE¶

Exercise 2: (3 points)¶

Exercise 3: (2 points)¶

Exercise 4: (2 points)¶

Exercise 5: (0 points) - FREE¶

Exercise 6: (1 points)¶

Exercise 7: (2 points)¶

Exercise 8: (3 points)¶

Exercise 9: (2 points)¶

Fin¶

Postscript¶

Midterm 2, Fall 2024: Netflix and Bills¶

The Problem: Analyzing Netflix's Stocks and Services¶

Exercise 0: (1 points)¶

Exercise 1: (1 points) - FREE¶

Exercise 2: (3 points)¶

Exercise 3: (2 points)¶

Exercise 4: (2 points)¶

Exercise 5: (0 points) - FREE¶

Exercise 6: (1 points)¶

Exercise 7: (2 points)¶

Exercise 8: (3 points)¶

Exercise 9: (2 points)¶

Fin¶

Postscript¶

`Midterm 2, Fall 2024`: `Netflix and Bills`¶