- import pandas as pd
- # df = pd.read_csv('salaries_by_college_major.csv')
- # print(df.head())
- # print(df.shape)
- # print(df.tail())
- # print(df.columns)
- # print(df.isna())
- # print(df.tail())
- # clean_df = df.dropna()
- # print(clean_df.tail())
- # print(clean_df['Undergraduate Major'])
- # print(clean_df['Starting Median Salary'])
- # print(clean_df['Starting Median Salary'].max())
- # print(clean_df['Starting Median Salary'].min())
- # print(clean_df['Starting Median Salary'].idxmax())
- # print(clean_df['Starting Median Salary'].idxmin())
- # print(clean_df['Starting Median Salary'][43])
- # highest = clean_df.sort_values(['Starting Median Salary'], ascending=True)
- # print(highest['Starting Median Salary'])
- # difference = clean_df['Mid-Career 90th Percentile Salary'] - clean_df['Mid-Career 10th Percentile Salary']
- # clean_df.insert(1, 'Spread', difference)
- # print(clean_df.head())
- # print(clean_df.groupby('Group').count())
- df = pd.read_csv('QueryResults.csv', names=['DATE', 'TAG', 'POSTS'], header=0)
- print(df.head)
- print(df.groupby('TAG').sum())
- print(df.groupby('TAG').count())
- print(df['DATE'][1])
- print(type(df['DATE'][1]))
- df.DATE = pd.to_datetime(df.DATE)
- print(df.head)
- print(type(df['DATE'][1]))
- test_df = pd.DataFrame({
- 'Age': ['Young', 'Young', 'Young', 'Young', 'Old', 'Old', 'Old', 'Old'],
- 'Actor': [
- 'Jack', 'Arnold', 'Keanu', 'Sylvester', 'Jack', 'Arnald', 'Keanu',
- 'Sylvester'
- ],
- 'Power': [100, 80, 25, 50, 99, 75, 5, 30]
- })
- pivoted_df = test_df.pivot(index='Age', columns='Actor', values='Power')
- print(test_df)
- import matplotlib.pyplot as plt
- reshaped_df = df.pivot(index='DATE', columns='TAG', values='POSTS')
- reshaped_df = reshaped_df.fillna(0)
- plt.plot(reshaped_df.index, reshaped_df.java)
- plt.figure(figsize=(16, 10))
- plt.plot(reshaped_df.index, reshaped_df.java)
- plt.figure(figsize=(16, 10))
- plt.xticks(fontsize=14)
- plt.yticks(fontsize=14)
- plt.plot(reshaped_df.index, reshaped_df.java)
- plt.xlabel('Date', fontsize=14)
- plt.ylabel('Number of Posts', fontsize=14)
- plt.ylim(0, 300)
- # Create a simple DataFrame
- data = {
- 'Name': ['John', 'Mike', 'Sara', 'Mike', 'John', 'Sara'],
- 'Age': [25, 30, 22, 30, 27, 22],
- 'Gender': ['M', 'M', 'F', 'M', 'M', 'F']
- }
- df = pd.DataFrame(data)
- # Use the value_counts() method on the 'Name' column
- name_counts = df['Name'].value_counts()
- print(name_counts)
- #merged_df = pd.merge(set_theme_count, themes, on='id')
- print(df.describe())
- data = {
- 'Name': ['John', 'Mike', 'Sara', 'Mike', 'John', 'Sara'],
- 'Age': [25, 30, 22, 30, 27, 22],
- 'Gender': ['M', 'M', 'F', 'M', 'M', 'F']
- }
- df = pd.DataFrame(data)
- # Use the .values.any() method on the 'Age' column
- has_age_over_30 = (df['Age'].values > 30).any()
- print(has_age_over_30)
Recent Pastes